From 3143e95353072523ff5d9c977a474a87fe3cbe57 Mon Sep 17 00:00:00 2001 From: Tobias Droste Date: Sat, 16 Jul 2011 19:40:47 +0200 Subject: [PATCH 001/600] llvmpipe: fix build with LLVM 3.0svn LLVM 3.0svn introduced a new type system. It defines a new way to create named structs and removes the (now not needed) LLVMInvalidateStructLayout function. See revision 134829 of LLVM. Signed-off-by: Tobias Droste Signed-off-by: Brian Paul --- src/gallium/auxiliary/draw/draw_llvm.c | 48 +++++++++++++++++++------- src/gallium/drivers/llvmpipe/lp_jit.c | 19 +++++++--- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8bb87440497..996e295e4b5 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -96,7 +96,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); * Create LLVM type for struct draw_jit_texture */ static LLVMTypeRef -create_jit_texture_type(struct gallivm_state *gallivm) +create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef texture_type; @@ -120,13 +120,21 @@ create_jit_texture_type(struct gallivm_state *gallivm) elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, texture_type); + /* Make sure the target's struct layout cache doesn't return * stale/invalid data. */ LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, target, texture_type, @@ -176,7 +184,7 @@ create_jit_texture_type(struct gallivm_state *gallivm) */ static LLVMTypeRef create_jit_context_type(struct gallivm_state *gallivm, - LLVMTypeRef texture_type) + LLVMTypeRef texture_type, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); @@ -189,11 +197,17 @@ create_jit_context_type(struct gallivm_state *gallivm, elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */ elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_VERTEX_SAMPLERS); /* textures */ - +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, context_type); LLVMInvalidateStructLayout(gallivm->target, context_type); +#endif LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, target, context_type, 0); @@ -215,7 +229,7 @@ create_jit_context_type(struct gallivm_state *gallivm, * Create LLVM type for struct pipe_vertex_buffer */ static LLVMTypeRef -create_jit_vertex_buffer_type(struct gallivm_state *gallivm) +create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef elem_types[3]; @@ -225,10 +239,17 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) elem_types[1] = LLVMInt32TypeInContext(gallivm->context); elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ +#if HAVE_LLVM >= 0x0300 + vb_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(vb_type, elem_types, + Elements(elem_types), 0); +#else vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, vb_type); LLVMInvalidateStructLayout(gallivm->target, vb_type); +#endif LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); @@ -258,10 +279,17 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) elem_types[1] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); elem_types[2] = LLVMArrayType(elem_types[1], data_elems); +#if HAVE_LLVM >= 0x0300 + vertex_header = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(vertex_header, elem_types, + Elements(elem_types), 0); +#else vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, vertex_header); LLVMInvalidateStructLayout(gallivm->target, vertex_header); +#endif /* these are bit-fields and we can't take address of them LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, @@ -284,8 +312,6 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) target, vertex_header, DRAW_JIT_VERTEX_DATA); - LLVMAddTypeName(gallivm->module, struct_name, vertex_header); - return vertex_header; } @@ -299,19 +325,15 @@ create_jit_types(struct draw_llvm *llvm) struct gallivm_state *gallivm = llvm->gallivm; LLVMTypeRef texture_type, context_type, buffer_type, vb_type; - texture_type = create_jit_texture_type(gallivm); - LLVMAddTypeName(gallivm->module, "texture", texture_type); + texture_type = create_jit_texture_type(gallivm, "texture"); - context_type = create_jit_context_type(gallivm, texture_type); - LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type); + context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context"); llvm->context_ptr_type = LLVMPointerType(context_type, 0); buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); - LLVMAddTypeName(gallivm->module, "buffer", buffer_type); llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0); - vb_type = create_jit_vertex_buffer_type(gallivm); - LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type); + vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer"); llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 268f0fa034b..ce92a80721a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -68,10 +68,17 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(lc), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, "texture"); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, "texture", texture_type); LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, gallivm->target, texture_type, @@ -112,8 +119,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, gallivm->target, texture_type); - - LLVMAddTypeName(gallivm->module, "texture", texture_type); } /* struct lp_jit_context */ @@ -129,11 +134,19 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, "context"); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); LLVMInvalidateStructLayout(gallivm->target, context_type); + LLVMAddTypeName(gallivm->module, "context", context_type); +#endif + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, gallivm->target, context_type, LP_JIT_CTX_CONSTANTS); @@ -155,8 +168,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_context, gallivm->target, context_type); - LLVMAddTypeName(gallivm->module, "context", context_type); - lp->jit_context_ptr_type = LLVMPointerType(context_type, 0); } From b38c26f19f529a0d0b90524cab944cf2c8a3f560 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 08:42:46 -0600 Subject: [PATCH 002/600] llvmpipe: include LLVM version number in name string --- src/gallium/drivers/llvmpipe/lp_screen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 4b2ae1436ea..e3f8c19679f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -93,7 +93,9 @@ llvmpipe_get_vendor(struct pipe_screen *screen) static const char * llvmpipe_get_name(struct pipe_screen *screen) { - return "llvmpipe"; + static char buf[100]; + snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM); + return buf; } From fb5ff51f422e1718c09da01f3c5bb5baecc9d68e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Jul 2011 12:20:14 -0700 Subject: [PATCH 003/600] i965: Fix regression in 29a911c50e4443dfebef0a2e32c39b64992fa3cc. The previous define was the full 32-bit header, while the new define was just the top 16 bits. --- src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 03cebbb824b..f7e6e7c81d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | ((ctx->DrawBuffer->Height - 1) << 16)); From f7dbcba280e4397cadb14f230aa925b4143cdde4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 18 Jul 2011 00:37:45 -0700 Subject: [PATCH 004/600] intel: Fix stencil buffer to be W tiled Until now, the stencil buffer was allocated as a Y tiled buffer, because in several locations the PRM states that it is. However, it is actually W tiled. From the PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Format: W-Major Tile Format is used for separate stencil. The GTT is incapable of W fencing, so we allocate the stencil buffer with I915_TILING_NONE and decode the tile's layout in software. This fix touches the following portions of code: - In intel_allocate_renderbuffer_storage(), allocate the stencil buffer with I915_TILING_NONE. - In intel_verify_dri2_has_hiz(), verify that the stencil buffer is not tiled. - In the stencil buffer's span functions, the tile's layout must be decoded in software. This commit mutually depends on the xf86-video-intel commit dri: Do not tile stencil buffer Author: Chad Versace Date: Mon Jul 18 00:38:00 2011 -0700 On Gen6 with separate stencil enabled, fixes the following Piglit tests: bugs/fdo23670-drawpix_stencil general/stencil-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-readpixels spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-copypixels spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-readpixels spec/EXT_packed_depth_stencil/readpixels-24_8 Note: This is a candidate for the 7.11 branch. Signed-off-by: Chad Versace Reviewed-by: Eric Anholt Reviewed-by: Ian Romanick Acked-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_clear.c | 6 ++ src/mesa/drivers/dri/intel/intel_context.c | 9 ++- src/mesa/drivers/dri/intel/intel_fbo.c | 12 +-- src/mesa/drivers/dri/intel/intel_screen.h | 9 ++- src/mesa/drivers/dri/intel/intel_span.c | 88 ++++++++++++++++------ 5 files changed, 93 insertions(+), 31 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index dfca03c14bf..5ab98734cfc 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) */ tri_mask |= BUFFER_BIT_STENCIL; } + else if (intel->has_separate_stencil && + stencilRegion->tiling == I915_TILING_NONE) { + /* The stencil buffer is actually W tiled, which the hardware + * cannot blit to. */ + tri_mask |= BUFFER_BIT_STENCIL; + } else { /* clearing all stencil bits, use blitting */ blit_mask |= BUFFER_BIT_STENCIL; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 2ba13632569..fe8be082dfc 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_Y) { + if (stencil_rb->region->tiling == I915_TILING_NONE) { + /* + * The stencil buffer is actually W tiled. The region's tiling is + * I915_TILING_NONE, however, because the GTT is incapable of W + * fencing. + */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; return; } else { @@ -1527,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * Presently, however, no verification or clean up is necessary, and * execution should not reach here. If the framebuffer still has a hiz * region, then we have already set dri2_has_hiz to true after - * confirming above that the stencil buffer is Y tiled. + * confirming above that the stencil buffer is W tiled. */ assert(0); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 55bcc757873..35be3257ab3 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->Base.Format == MESA_FORMAT_S8) { /* + * The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + * * The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as @@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * To accomplish this, we resort to the nasty hack of doubling the drm * region's cpp and halving its height. * - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() - * maps the memory incorrectly. + * If we neglect to double the pitch, then render corruption occurs. */ irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, + I915_TILING_NONE, cpp * 2, - width, - height / 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64), GL_TRUE); if (!irb->region) return false; diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index b2013af1a29..9dd6a525566 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -63,9 +63,12 @@ * x8_z24 and s8). * * Eventually, intel_update_renderbuffers() makes a DRI2 request for - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if - * nothing happend. + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to + * true and continue as if nothing happend. + * + * [1] The stencil buffer is actually W tiled. However, we request from the + * kernel a non-tiled buffer because the GTT is incapable of W fencing. * * If the buffers are X tiled, however, the handshake has failed and we must * clean up. diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 153803fba09..2e1c80c4766 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel, int miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int stride = rb->RowStride; \ - uint8_t *buf = rb->Data; \ + \ + /* \ + * Here we ignore rb->Data and rb->RowStride as set by \ + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ + * manually, the region's *real* base address and stride is \ + * required. \ + */ \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + uint8_t *buf = irb->region->buffer->virtual; \ + unsigned stride = irb->region->pitch; \ + unsigned height = 2 * irb->region->height; \ + bool flip = rb->Name == 0; \ + int y_scale = flip ? -1 : 1; \ + int y_bias = flip ? (height - 1) : 0; \ -/* Don't flip y. */ #undef Y_FLIP -#define Y_FLIP(y) y +#define Y_FLIP(y) (y_scale * (y) + y_bias) /** * \brief Get pointer offset into stencil buffer. * - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. - * The table below demonstrates how the pointer arithmetic behaves for a buffer - * with positive stride (s=stride). - * - * x | y | byte offset - * -------------------------- - * 0 | 0 | 0 - * 0 | 1 | 1 - * 1 | 0 | 2 - * 1 | 1 | 3 - * ... | ... | ... - * 0 | 2 | s - * 0 | 3 | s + 1 - * 1 | 2 | s + 2 - * 1 | 3 | s + 3 + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) */ static inline intptr_t -intel_offset_S8(int stride, GLint x, GLint y) +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) { - return 2 * ((y / 2) * stride + x) + y % 2; + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + /* + * Errata for Gen5: + * + * An additional offset is needed which is not documented in the PRM. + * + * if ((byte_x / 8) % 2 == 1) { + * if ((byte_y / 8) % 2) == 0) { + * u += 64; + * } else { + * u -= 64; + * } + * } + * + * The offset is expressed more tersely as + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); + */ + + return u; } #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; From 0f20e2e18f902b4319851643e1775a18c2aacb3d Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 18 Jul 2011 00:42:27 +0200 Subject: [PATCH 005/600] glx: Avoid calling __glXInitialize() in driReleaseDrawables(). This fixes a regression introduced by commit a26121f37530619610a78a5fbe5ef87e44047fda (fd.o bug #39219). Since the __glXInitialize() call should be unnecessary anyway, this is probably a nicer fix for the original problem too. NOTE: This is a candidate for the 7.10 and 7.11 branches. Signed-off-by: Henri Verbeet Reviewed-by: Ian Romanick Tested-by: padfoot@exemail.com.au --- src/glx/dri_common.c | 2 +- src/glx/glxext.c | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index bac0c9e5911..e7dba5a68de 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -388,7 +388,7 @@ driFetchDrawable(struct glx_context *gc, GLXDrawable glxDrawable) _X_HIDDEN void driReleaseDrawables(struct glx_context *gc) { - struct glx_display *const priv = __glXInitialize(gc->psc->dpy); + const struct glx_display *priv = gc->psc->display; __GLXDRIdrawable *pdraw; if (priv == NULL) diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 8704c484f96..8254544d1c0 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -260,25 +260,20 @@ glx_display_free(struct glx_display *priv) static int __glXCloseDisplay(Display * dpy, XExtCodes * codes) { - struct glx_display *priv, **prev, *next; + struct glx_display *priv, **prev; _XLockMutex(_Xglobal_lock); prev = &glx_displays; for (priv = glx_displays; priv; prev = &priv->next, priv = priv->next) { if (priv->dpy == dpy) { + *prev = priv->next; break; } } - - /* Only remove the display from the list after it's destroyed. The cleanup - * code (e.g. driReleaseDrawables()) ends up calling __glXInitialize(), - * which would create a new glx_display while we're trying to destroy this - * one. */ - next = priv->next; - glx_display_free(priv); - *prev = next; _XUnlockMutex(_Xglobal_lock); + glx_display_free(priv); + return 1; } From f0e306c3430e4d8f6c8e085537807007a488f1e2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 15:24:47 -0600 Subject: [PATCH 006/600] mesa: update, shorten some comments in dd.h --- src/mesa/main/dd.h | 51 ++++++++++++---------------------------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 9fe6d527f92..e1ae30fe4d4 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -189,31 +189,22 @@ struct dd_function_table { /*@{*/ /** - * Choose texture format. - * - * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback - * functions. The driver should examine \p internalFormat and return a - * gl_format value. + * Choose actual hardware texture format given the user-provided source + * image format and type and the desired internal format. In some + * cases, srcFormat and srcType can be GL_NONE. + * Called by glTexImage(), etc. */ GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** - * Called by glTexImage1D(). - * - * \param target user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. + * Called by glTexImage1D(). Simply copy the source texture data into the + * destination texture memory. The gl_texture_image fields, etc. will be + * fully initialized. + * The parameters are the same as glTexImage1D(), plus: + * \param packing describes how to unpack the source data. * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p depth, \p border and \p internalFormat information. - * - * \p retainInternalCopy is returned by this function and indicates whether - * core Mesa should keep an internal copy of the texture image. - * - * Drivers should call a fallback routine from texstore.c if needed. + * \param texImage is the target texture image. */ void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint internalFormat, @@ -250,25 +241,9 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glTexSubImage1D(). - * - * \param target user specified. - * \param level user specified. - * \param xoffset user specified. - * \param yoffset user specified. - * \param zoffset user specified. - * \param width user specified. - * \param height user specified. - * \param depth user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. - * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p border and \p internalFormat information. - * - * The driver should use a fallback routine from texstore.c if needed. + * Called by glTexSubImage1D(). Replace a subset of the target texture + * with new texel data. + * \sa dd_function_table::TexImage1D. */ void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, From d84791a72b33f96fab54ff2399e8053c50205454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Tue, 19 Jul 2011 15:25:32 -0600 Subject: [PATCH 007/600] st/mesa: fix the texture format in st_context_teximage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1a339b6c71ebab6e1a64f05b2e133022d3bbcd15 made st_ChooseTextureFormat map GL_RGBA with type GL_UNSIGNED_BYTE to PIPE_FORMAT_A8B8G8R8_UNORM. The image format for ARGB pixmaps is PIPE_FORMAT_B8G8R8A8_UNORM however. This mismatch caused the texture to be recreated in st_finalize_texture. NOTE: This is a candidate for the 7.11 branch. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39209 Signed-off-by: Fredrik Höglund Reviewed-by: Stéphane Marchesin Signed-off-by: Brian Paul --- src/mesa/state_tracker/st_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 7bd82aae206..d5228d387f7 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi, internalFormat = GL_RGB; texFormat = st_ChooseTextureFormat(ctx, internalFormat, - GL_RGBA, GL_UNSIGNED_BYTE); + GL_BGRA, GL_UNSIGNED_BYTE); _mesa_init_teximage_fields(ctx, target, texImage, tex->width0, tex->height0, 1, 0, From 496bf3822a724127b2632596dc45648fdeda0afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Lind?= Date: Tue, 19 Jul 2011 22:52:20 +0200 Subject: [PATCH 008/600] Make it possible to use gbm with c++ NOTE: This is a candiate for 7.11 --- src/gbm/main/gbm.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h index d79a03e4b3f..05d2292dc75 100644 --- a/src/gbm/main/gbm.h +++ b/src/gbm/main/gbm.h @@ -28,6 +28,11 @@ #ifndef _GBM_H_ #define _GBM_H_ +#ifdef __cplusplus +extern "C" { +#endif + + #define __GBM__ 1 #include @@ -97,4 +102,8 @@ gbm_bo_get_handle(struct gbm_bo *bo); void gbm_bo_destroy(struct gbm_bo *bo); +#ifdef __cplusplus +} +#endif + #endif From 5874890c26f434f54e9218b83fae4eb8175c24e9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 009/600] mesa: stop using ctx->Driver.CopyTexImage1D/2D() hooks --- src/mesa/main/teximage.c | 51 ++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 6f53686e7ff..302fd65cb27 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -2797,29 +2797,43 @@ copyteximage(struct gl_context *ctx, GLuint dims, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } - - ASSERT(texImage->Data == NULL); - - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); + /* choose actual hw format */ + gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, + target, level, + internalFormat, + GL_NONE, GL_NONE); if (legal_texture_size(ctx, texFormat, width, height, 1)) { + GLint srcX = x, srcY = y, dstX = 0, dstY = 0; + + /* Free old texture image */ + ctx->Driver.FreeTexImageData(ctx, texImage); + _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, border, internalFormat, texFormat); - ASSERT(ctx->Driver.CopyTexImage2D); - if (dims == 1) - ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, - x, y, width, border); - else - ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, - x, y, width, height, border); + /* Allocate texture memory (no pixel data yet) */ + if (dims == 1) { + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, GL_NONE, GL_NONE, NULL, + &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, GL_NONE, GL_NONE, + NULL, &ctx->Unpack, texObj, texImage); + } + + if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, + &width, &height)) { + if (dims == 1) + ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX, + srcX, srcY, width); + + else + ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY, + srcX, srcY, width, height); + } check_gen_mipmap(ctx, target, texObj, level); @@ -2830,6 +2844,7 @@ copyteximage(struct gl_context *ctx, GLuint dims, ctx->NewState |= _NEW_TEXTURE; } else { + /* probably too large of image */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } } From 1da28fa959e80610ebc9b7a28bfb83e3cad3aee4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 010/600] mesa: remove comments referring to Driver.TexImage1D/2D --- src/mesa/main/texstore.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 6e1e63bdfb0..c4aeaa8f16d 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage) /** - * This is the software fallback for Driver.TexImage1D() - * and Driver.CopyTexImage1D(). + * This is the software fallback for Driver.TexImage1D(). * \sa _mesa_store_teximage2d() */ void @@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage2D() - * and Driver.CopyTexImage2D(). + * This is the software fallback for Driver.TexImage2D(). * * This function is oriented toward storing images in main memory, rather * than VRAM. Device driver's can easily plug in their own replacement. @@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage3D() - * and Driver.CopyTexImage3D(). + * This is the software fallback for Driver.TexImage3D(). * \sa _mesa_store_teximage2d() */ void From fbe6836043dff2798571b838096ed59c60ec4438 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 011/600] intel: remove intelCopyTexImage1D/2D() --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 97 --------------------- 1 file changed, 97 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1a3643da593..e89e91dee3e 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -163,101 +163,6 @@ intel_copy_texsubimage(struct intel_context *intel, } -static void -intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty, height; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - height = 1; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, - width, border); -} - - -static void -intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - - static void intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) @@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, void intelInitTextureCopyImageFuncs(struct dd_function_table *functions) { - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; } From 9ed87c4463cf265b06566d15ba86bf20661c70de Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 012/600] radeon: remove radeonCopyTexImage2D() --- src/mesa/drivers/dri/r200/r200_tex.c | 1 - src/mesa/drivers/dri/r300/r300_tex.c | 1 - src/mesa/drivers/dri/r600/evergreen_tex.c | 1 - src/mesa/drivers/dri/r600/r600_tex.c | 1 - src/mesa/drivers/dri/radeon/radeon_tex.c | 1 - src/mesa/drivers/dri/radeon/radeon_tex_copy.c | 55 ------------------- src/mesa/drivers/dri/radeon/radeon_texture.h | 5 -- 7 files changed, 65 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index d42e8f12041..91e77f9f7da 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 590d9afe14a..93d8fe185ef 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 33a5f277683..9784a8484f2 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1688,7 +1688,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index eb7ed30c7a3..3efa1d197fa 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 25a8ddf7b6a..a0b5506ae76 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index f14dfa25d40..94ff3c4a727 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -140,61 +140,6 @@ do_copy_texsubimage(struct gl_context *ctx, dstx, dsty, width, height, flip_y); } -void -radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_prepare_render(radeon); - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) { - return; - } - - if (!do_copy_texsubimage(ctx, target, level, - radeon_tex_obj(texObj), (radeon_texture_image *)texImage, - 0, 0, x, y, width, height)) { - goto fail; - } - - return; - -fail: - radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, - "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", - _mesa_lookup_enum_by_nr(internalFormat), border); - - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 538a07fbba8..6fc06d967dd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le struct gl_texture_object *texObj, struct gl_texture_image *texImage); -void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, From 0823ef84a5c3a6332ea76d0001febf6aaa440dc3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 013/600] st/mesa: remove st_CopyTexImage1D/2D() --- src/mesa/state_tracker/st_cb_texture.c | 55 -------------------------- 1 file changed, 55 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6907cfc03cf..83e83695aae 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1609,59 +1609,6 @@ st_copy_texsubimage(struct gl_context *ctx, -static void -st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, 1); /* src X, Y, size */ -} - - -static void -st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, height); /* src X, Y, size */ -} - - static void st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) @@ -1947,8 +1894,6 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D; functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D; functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D; - functions->CopyTexImage1D = st_CopyTexImage1D; - functions->CopyTexImage2D = st_CopyTexImage2D; functions->CopyTexSubImage1D = st_CopyTexSubImage1D; functions->CopyTexSubImage2D = st_CopyTexSubImage2D; functions->CopyTexSubImage3D = st_CopyTexSubImage3D; From 774311fb5403e3da7ff0197199ffad8f34089e6a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 014/600] meta: remove _mesa_meta_CopyTexImage1D/2D() --- src/mesa/drivers/common/driverfuncs.c | 2 - src/mesa/drivers/common/meta.c | 113 -------------------------- src/mesa/drivers/common/meta.h | 10 --- 3 files changed, 125 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 8ab129dd73d..76630264bf7 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexSubImage2D = _mesa_store_texsubimage2d; driver->TexSubImage3D = _mesa_store_texsubimage3d; driver->GetTexImage = _mesa_get_teximage; - driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D; - driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D; driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..706239c7736 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2677,119 +2677,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat) } -/** - * Helper for _mesa_meta_CopyTexImage1/2D() functions. - * Have to be careful with locking and meta state for pixel transfer. - */ -static void -copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum format, type; - GLint bpp; - void *buf; - struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer; - - texObj = _mesa_get_current_tex_object(ctx, target); - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - - /* Choose format/type for temporary image buffer */ - format = _mesa_base_tex_format(ctx, internalFormat); - - if (format == GL_LUMINANCE && - _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) { - /* The glReadPixels() path will convert RGB to luminance by - * summing R+G+B. glCopyTexImage() is supposed to behave as - * glCopyPixels, which doesn't do that change, and instead - * leaves it up to glTexImage which converts RGB to luminance by - * just taking the R channel. To avoid glReadPixels() trashing - * our data, use RGBA for our temporary image. - */ - format = GL_RGBA; - } - - type = get_temp_image_type(ctx, format); - bpp = _mesa_bytes_per_pixel(format, type); - if (bpp <= 0) { - _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()"); - return; - } - - /* - * Alloc image buffer (XXX could use a PBO) - */ - buf = malloc(width * height * bpp); - if (!buf) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); - return; - } - - _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ - - /* - * Read image from framebuffer (disable pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); - ctx->Driver.ReadPixels(ctx, x, y, width, height, - format, type, &ctx->Pack, buf); - _mesa_meta_end(ctx); - - if (texImage->Data) { - ctx->Driver.FreeTexImageData(ctx, texImage); - } - - /* The texture's format was already chosen in _mesa_CopyTexImage() */ - ASSERT(texImage->TexFormat != MESA_FORMAT_NONE); - - /* - * Store texture data (with pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); - - _mesa_update_state(ctx); /* to update pixel transfer state */ - - if (target == GL_TEXTURE_1D) { - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - else { - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - _mesa_meta_end(ctx); - - _mesa_lock_texture(ctx, texObj); /* re-lock */ - - free(buf); -} - - -void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border) -{ - copy_tex_image(ctx, 1, target, level, internalFormat, x, y, - width, 1, border); -} - - -void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - copy_tex_image(ctx, 2, target, level, internalFormat, x, y, - width, height, border); -} - - - /** * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions. * Have to be careful with locking and meta state for pixel transfer. diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..7190dee768a 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -68,16 +68,6 @@ extern void _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj); -extern void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border); - -extern void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border); - extern void _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, From 1c1fc62e388534b6c0751fc9f8ab34a89e25efd0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 015/600] mesa: remove unused dd_function_table::CopyTexImage1D/2D() hooks --- src/mesa/main/dd.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index e1ae30fe4d4..e0c5844e193 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -289,24 +289,6 @@ struct dd_function_table { struct gl_texture_object *texObj, struct gl_texture_image *texImage ); - /** - * Called by glCopyTexImage1D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border ); - - /** - * Called by glCopyTexImage2D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border ); - /** * Called by glCopyTexSubImage1D(). * From d5e32397762a3bd55fa69ad6332351512083f9c6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: [PATCH 016/600] st/mesa: get rid of redundant clipping code in st_copy_texsubimage() --- src/mesa/state_tracker/st_cb_texture.c | 28 -------------------------- 1 file changed, 28 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 83e83695aae..25f08aa4d09 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx, depth/stencil samples per pixel? Need some transfer clarifications. */ assert(sample_count < 2); - if (srcX < 0) { - width -= -srcX; - destX += -srcX; - srcX = 0; - } - - if (srcY < 0) { - height -= -srcY; - destY += -srcY; - srcY = 0; - } - - if (destX < 0) { - width -= -destX; - srcX += -destX; - destX = 0; - } - - if (destY < 0) { - height -= -destY; - srcY += -destY; - destY = 0; - } - - if (width < 0 || height < 0) - return; - - assert(strb); assert(strb->surface); assert(stImage->pt); From 4470ff2ebf56b22421038bc7272ef22c085b839d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 21:10:25 -0600 Subject: [PATCH 017/600] glsl: silence warning in linker.cpp --- src/glsl/linker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 34b64837a46..5ec08446d16 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1343,7 +1343,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, foreach_list(node, sh->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - if ((var == NULL) || (var->mode != direction)) + if ((var == NULL) || (var->mode != (unsigned) direction)) continue; if (var->explicit_location) { From 8d044047f133ad4e4c4f1f6b4a4a1c4a9fa477f5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 21:11:53 -0600 Subject: [PATCH 018/600] mesa: remove depend files from tarballs --- Makefile | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index b0a2d8065f6..817f3d3706a 100644 --- a/Makefile +++ b/Makefile @@ -203,12 +203,6 @@ EXTRA_FILES = \ IGNORE_FILES = \ -x autogen.sh -DEPEND_FILES = \ - src/mesa/depend \ - src/glx/depend \ - src/glw/depend \ - src/glu/sgi/depend - parsers: configure -@touch $(TOP)/configs/current @@ -231,15 +225,9 @@ AUTOCONF = autoconf AC_FLAGS = aclocal.m4: configure.ac acinclude.m4 $(ACLOCAL) $(ACLOCAL_FLAGS) -configure: rm_depend configure.ac aclocal.m4 acinclude.m4 +configure: configure.ac aclocal.m4 acinclude.m4 $(AUTOCONF) $(AC_FLAGS) -rm_depend: - @for dep in $(DEPEND_FILES) ; do \ - rm -f $$dep ; \ - touch $$dep ; \ - done - manifest.txt: .git ( \ ls -1 $(EXTRA_FILES) ; \ @@ -269,4 +257,4 @@ md5: $(ARCHIVES) @-md5sum $(PACKAGE_NAME).tar.bz2 @-md5sum $(PACKAGE_NAME).zip -.PHONY: tarballs rm_depend md5 +.PHONY: tarballs md5 From 3875526926123259521514de9c8d675e3797275a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 15 Jul 2011 02:27:49 -0700 Subject: [PATCH 019/600] glsl: Avoid massive ralloc_strndup overhead in S-Expression parsing. When parsing S-Expressions, we need to store nul-terminated strings for Symbol nodes. Prior to this patch, we called ralloc_strndup each time we constructed a new s_symbol. It turns out that this is obscenely expensive. Instead, copy the whole buffer before parsing and overwrite it to contain \0 bytes at the appropriate locations. Since atoms are separated by whitespace, (), or ;, we can safely overwrite the character after a Symbol. While much of the buffer may be unused, copying the whole buffer is simple and guaranteed to provide enough space. Prior to this, running piglit-run.py -t glsl tests/quick.tests with GLSL 1.30 enabled took just over 10 minutes on my machine. Now it takes 5. NOTE: This is a candidate for stable release branches (because it will make running comparison tests so much less irritating.) Signed-off-by: Kenneth Graunke --- src/glsl/s_expression.cpp | 94 +++++++++++++++++++++++++-------------- src/glsl/s_expression.h | 2 +- 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp index a922a50d3b9..e704a3be20d 100644 --- a/src/glsl/s_expression.cpp +++ b/src/glsl/s_expression.cpp @@ -25,10 +25,13 @@ #include #include "s_expression.h" -s_symbol::s_symbol(const char *tmp, size_t n) +s_symbol::s_symbol(const char *str, size_t n) { - this->str = ralloc_strndup (this, tmp, n); - assert(this->str != NULL); + /* Assume the given string is already nul-terminated and in memory that + * will live as long as this node. + */ + assert(str[n] == '\0'); + this->str = str; } s_list::s_list() @@ -36,22 +39,26 @@ s_list::s_list() } static void -skip_whitespace(const char *& src) +skip_whitespace(const char *&src, char *&symbol_buffer) { - src += strspn(src, " \v\t\r\n"); + size_t n = strspn(src, " \v\t\r\n"); + src += n; + symbol_buffer += n; /* Also skip Scheme-style comments: semi-colon 'til end of line */ if (src[0] == ';') { - src += strcspn(src, "\n"); - skip_whitespace(src); + n = strcspn(src, "\n"); + src += n; + symbol_buffer += n; + skip_whitespace(src, symbol_buffer); } } static s_expression * -read_atom(void *ctx, const char *& src) +read_atom(void *ctx, const char *&src, char *&symbol_buffer) { s_expression *expr = NULL; - skip_whitespace(src); + skip_whitespace(src, symbol_buffer); size_t n = strcspn(src, "( \v\t\r\n);"); if (n == 0) @@ -70,42 +77,63 @@ read_atom(void *ctx, const char *& src) expr = new(ctx) s_int(i); } else { // Not a number; return a symbol. - expr = new(ctx) s_symbol(src, n); + symbol_buffer[n] = '\0'; + expr = new(ctx) s_symbol(symbol_buffer, n); } src += n; + symbol_buffer += n; return expr; } +static s_expression * +__read_expression(void *ctx, const char *&src, char *&symbol_buffer) +{ + s_expression *atom = read_atom(ctx, src, symbol_buffer); + if (atom != NULL) + return atom; + + skip_whitespace(src, symbol_buffer); + if (src[0] == '(') { + ++src; + ++symbol_buffer; + + s_list *list = new(ctx) s_list; + s_expression *expr; + + while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) { + list->subexpressions.push_tail(expr); + } + skip_whitespace(src, symbol_buffer); + if (src[0] != ')') { + printf("Unclosed expression (check your parenthesis).\n"); + return NULL; + } + ++src; + ++symbol_buffer; + return list; + } + return NULL; +} + s_expression * s_expression::read_expression(void *ctx, const char *&src) { assert(src != NULL); - s_expression *atom = read_atom(ctx, src); - if (atom != NULL) - return atom; - - skip_whitespace(src); - if (src[0] == '(') { - ++src; - - s_list *list = new(ctx) s_list; - s_expression *expr; - - while ((expr = read_expression(ctx, src)) != NULL) { - list->subexpressions.push_tail(expr); - } - skip_whitespace(src); - if (src[0] != ')') { - printf("Unclosed expression (check your parenthesis).\n"); - return NULL; - } - ++src; - return list; - } - return NULL; + /* When we encounter a Symbol, we need to save a nul-terminated copy of + * the string. However, ralloc_strndup'ing every individual Symbol is + * extremely expensive. We could avoid this by simply overwriting the + * next character (guaranteed to be whitespace, parens, or semicolon) with + * a nul-byte. But overwriting non-whitespace would mess up parsing. + * + * So, just copy the whole buffer ahead of time. Walk both, leaving the + * original source string unmodified, and altering the copy to contain the + * necessary nul-bytes whenever we encounter a symbol. + */ + char *symbol_buffer = ralloc_strdup(ctx, src); + return __read_expression(ctx, src, symbol_buffer); } void s_int::print() diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h index c9dc676b319..642af19b439 100644 --- a/src/glsl/s_expression.h +++ b/src/glsl/s_expression.h @@ -129,7 +129,7 @@ public: void print(); private: - char *str; + const char *str; }; /* Lists of expressions: (expr1 ... exprN) */ From 8082816e27a0ee376e679c4d81ff8a3f0611ea9e Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 20 Jul 2011 13:43:24 -0400 Subject: [PATCH 020/600] g3dvl: Init/clean pipe fully when a shader-based decoder isn't used. Fixes VDPAU CSC-only mode. --- src/gallium/auxiliary/vl/vl_compositor.c | 25 ++++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_compositor.h | 1 + 2 files changed, 26 insertions(+) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 3bd4af2e3e0..faca96dc55b 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -231,6 +231,8 @@ init_pipe_state(struct vl_compositor *c) struct pipe_rasterizer_state rast; struct pipe_sampler_state sampler; struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned i; assert(c); @@ -289,6 +291,24 @@ init_pipe_state(struct vl_compositor *c) c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast); + memset(&dsa, 0, sizeof dsa); + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + for (i = 0; i < 2; ++i) { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa); + c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa); return true; } @@ -296,6 +316,11 @@ static void cleanup_pipe_state(struct vl_compositor *c) { assert(c); + /* Asserted in softpipe_delete_fs_state() for some reason */ + c->pipe->bind_vs_state(c->pipe, NULL); + c->pipe->bind_fs_state(c->pipe, NULL); + + c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa); c->pipe->delete_sampler_state(c->pipe, c->sampler_linear); c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest); c->pipe->delete_blend_state(c->pipe, c->blend); diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 87ad39be1be..0a9a7411a61 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -68,6 +68,7 @@ struct vl_compositor void *sampler_nearest; void *blend; void *rast; + void *dsa; void *vertex_elems_state; void *vs; From b56daf71d2f63d044d4c53ab49c6f87e02991a28 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 20 Jul 2011 13:49:56 -0400 Subject: [PATCH 021/600] g3dvl: Preserve previously rendered components for MC output. Fixes xvmc-softpipe MC entrypoint, amongst others. --- src/gallium/auxiliary/vl/vl_mc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index bd05205b52d..add367e3ac6 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -590,7 +590,7 @@ vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface) } static void -prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask) +prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned component, unsigned mask) { struct vl_mc *renderer; @@ -599,7 +599,7 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask) renderer = buffer->renderer; renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state); - if (buffer->surface_cleared) + if (buffer->surface_cleared || component > 0) renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]); else renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]); @@ -615,7 +615,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) assert(buffer && ref); - prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B); + prepare_pipe_4_rendering(buffer, 0, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B); renderer = buffer->renderer; @@ -643,7 +643,7 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num if (num_instances == 0) return; - prepare_pipe_4_rendering(buffer, mask); + prepare_pipe_4_rendering(buffer, component, mask); renderer = buffer->renderer; From eee570290aebc8a339acd063033e3daefcef2bc6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 17 Jul 2011 14:53:16 -0700 Subject: [PATCH 022/600] meta: Add a GLSL-based _mesa_meta_Clear() variant. This cuts out a large portion of the overhead of glClear() from resetting the texenv state and recomputing the fixed function programs. It also means less use of fixed function internally in our GLES2 drivers, which is rather bogus. Reviewed-by: Brian Paul --- src/mesa/drivers/common/meta.c | 160 ++++++++++++++++++++++++++++++++- src/mesa/drivers/common/meta.h | 3 + 2 files changed, 162 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..887118b9417 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -62,6 +62,7 @@ #include "main/teximage.h" #include "main/texparam.h" #include "main/texstate.h" +#include "main/uniforms.h" #include "main/varray.h" #include "main/viewport.h" #include "program/program.h" @@ -235,6 +236,8 @@ struct clear_state { GLuint ArrayObj; GLuint VBO; + GLuint ShaderProg; + GLint ColorLocation; }; @@ -1589,10 +1592,165 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_meta_end(ctx); } +static void +meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) +{ + const char *vs_source = + "attribute vec4 position;\n" + "void main()\n" + "{\n" + " gl_Position = position;\n" + "}\n"; + const char *fs_source = + "uniform vec4 color;\n" + "void main()\n" + "{\n" + " gl_FragColor = color;\n" + "}\n"; + GLuint vs, fs; + + if (clear->ArrayObj != 0) + return; + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* setup vertex arrays */ + _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0); + _mesa_EnableVertexAttribArrayARB(0); + + vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); + _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); + _mesa_CompileShaderARB(vs); + + fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); + _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); + _mesa_CompileShaderARB(fs); + + clear->ShaderProg = _mesa_CreateProgramObjectARB(); + _mesa_AttachShader(clear->ShaderProg, fs); + _mesa_AttachShader(clear->ShaderProg, vs); + _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position"); + _mesa_LinkProgramARB(clear->ShaderProg); + + clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg, + "color"); +} + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + GLbitfield metaSave; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; + struct gl_framebuffer *fb = ctx->DrawBuffer; + const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f; + const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f; + const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f; + const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f; + const float z = -invert_z(ctx->Depth.Clear); + struct vertex { + GLfloat x, y, z; + } verts[4]; + + metaSave = (META_ALPHA_TEST | + META_BLEND | + META_DEPTH_TEST | + META_RASTERIZATION | + META_SHADER | + META_STENCIL_TEST | + META_VERTEX | + META_VIEWPORT | + META_CLAMP_FRAGMENT_COLOR); + + if (!(buffers & BUFFER_BITS_COLOR)) { + /* We'll use colormask to disable color writes. Otherwise, + * respect color mask + */ + metaSave |= META_COLOR_MASK; + } + + _mesa_meta_begin(ctx, metaSave); + + meta_glsl_clear_init(ctx, clear); + + _mesa_UseProgramObjectARB(clear->ShaderProg); + _mesa_Uniform4fvARB(clear->ColorLocation, 1, + ctx->Color.ClearColorUnclamped); + + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + + /* Clears never have the color clamped. */ + _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); + } + else { + ASSERT(metaSave & META_COLOR_MASK); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & stencilMax, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions */ + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + + /* upload new vertex data */ + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, + GL_DYNAMIC_DRAW_ARB); + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} /** * Meta implementation of ctx->Driver.CopyPixels() in terms - * of texture mapping and polygon rendering. + * of texture mapping and polygon rendering and GLSL shaders. */ void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..32c789ea638 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -42,6 +42,9 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, extern void _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers); +extern void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers); + extern void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, From 540e66b3bebb5ae82422e386aa178147ea14a39e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 17 Jul 2011 14:55:10 -0700 Subject: [PATCH 023/600] intel: Use the GLSL-based meta clear when available. Improves firefox-talos-gfx performance under GL when 3D clears are enabled: [ 0] gl-before firefox-talos-gfx 20.193 20.251 0.27% 3/3 [ 0] gl-after firefox-talos-gfx 18.013 18.040 0.19% 3/3 --- src/mesa/drivers/dri/intel/intel_clear.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 5ab98734cfc..81c062fba53 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -188,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (tri_mask) { debug_mask("tri", tri_mask); - _mesa_meta_Clear(&intel->ctx, tri_mask); + if (ctx->Extensions.ARB_fragment_shader) + _mesa_meta_glsl_Clear(&intel->ctx, tri_mask); + else + _mesa_meta_Clear(&intel->ctx, tri_mask); } } From dc7422405f6f3c201993251e4665bb9ec1b59db0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 18 Jul 2011 15:25:10 -0700 Subject: [PATCH 024/600] i965: Avoid kernel BUG_ON if we happen to wait on the pipe_control w/a BO. For this and occlusion queries, we're trying to avoid setting I915_GEM_DOMAIN_RENDER for the write domain, because the data written is definitely not going through the render cache, but we do need to tell the kernel that the object has been written. However, with using I915_GEM_DOMAIN_GTT, the kernel on retiring the batchbuffer sees that the w/a BO has a write domain of GTT, and puts it on the flushing list. If something tries to wait for that BO to finish rendering (such as the AUB dumper reading the contents of BOs), we get into wait_request (since obj->active) but with a 0 seqno (since the object is on the flushing list, not actually on a ringbuffer), and BUG_ONs. To avoid the kernel bug (which I'm hoping to delete soon anyway), just use I915_GEM_DOMAIN_INSTRUCTION like occlusion queries do. This doesn't result in more flushing, because we invalidate INSTRUCTION on every batchbuffer now that we're state streaming, anyway. Reviewed-by: Kenneth Graunke Tested-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b61a2ffef19..9c97ef22888 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -325,7 +325,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); OUT_RELOC(intel->batch.workaround_bo, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0); + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); From 407785d0e97abd0cc51a6e360089111973748e7c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 18 Jul 2011 17:17:03 -0700 Subject: [PATCH 025/600] i965: Enable the PIPE_CONTROL workaround workaround out of paranoia. There's scary stuff going on in PIPE_CONTROL internals, and if the BSpec says to do this to make PIPE_CONTROL work, I'll go ahead and do it because we'll probably never be able to debug it after the fact. v2: Use stall at scoreboard instead of depth stall, as noted by Ken. --- .../drivers/dri/intel/intel_batchbuffer.c | 31 +++++++++++++++++-- src/mesa/drivers/dri/intel/intel_reg.h | 1 + 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9c97ef22888..97cc219ce6d 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -308,12 +308,29 @@ emit: * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * - * XXX: There is also a workaround that would appear to apply to this - * workaround, but it doesn't appear to be necessary so far: + * And the workaround for these two requires this workaround first: * - * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. */ void intel_emit_post_sync_nonzero_flush(struct intel_context *intel) @@ -321,6 +338,14 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) if (!intel->batch.need_workaround_flush) return; + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 5aa629150cf..a98a669af21 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -75,6 +75,7 @@ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) From 3e5d36267d8c9536490c902f785137a7fa0637fc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Jul 2011 15:06:15 -0700 Subject: [PATCH 026/600] i965: Apply a homebrew workaround for GPU hang in OGLC api-texcoord. The behavior of flushes in the hardware is a maze of twisty passages, and strangely the VS constants appear to be loaded during a pipeline flush instead of at the time of the packet emit according to the simulator. On moving the STATE_BASE_ADDRESS packet to where it really needed to live (in order for data loads by other packets to be correct), we sometimes no longer got a flush between those packets where we apparently needed it. This replicates the flushes implied by a STATE_BASE_ADDRESS update, fixing the GPU hangs in OGLC and the "engine" demo. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36821 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39257 Tested-by: Keith Packard (bzflag and etracer fixed) Acked-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_vs_state.c | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fb4cdbaadf9..e70454416bf 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -160,6 +160,32 @@ upload_vs_state(struct brw_context *brw) GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { From 2d960d3f4e8fcf7819bb94ba4913ec0a2a723daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 15 Jul 2011 20:16:20 +0200 Subject: [PATCH 027/600] g3dvl: remove unused vertex shader inputs See also comments in the code. --- src/gallium/auxiliary/vl/vl_idct.c | 3 +-- src/gallium/auxiliary/vl/vl_mc.c | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 645d06a0925..ad786145392 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -143,7 +143,7 @@ static void * create_mismatch_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; - struct ureg_src vrect, vpos; + struct ureg_src vpos; struct ureg_src scale; struct ureg_dst t_tex; struct ureg_dst o_vpos, o_addr[2]; @@ -152,7 +152,6 @@ create_mismatch_vert_shader(struct vl_idct *idct) if (!shader) return NULL; - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_tex = ureg_DECL_temporary(shader); diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index add367e3ac6..81a05b539f3 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -103,7 +103,7 @@ create_ref_vert_shader(struct vl_mc *r) { struct ureg_program *shader; struct ureg_src mv_scale; - struct ureg_src vrect, vmv[2]; + struct ureg_src vmv[2]; struct ureg_dst t_vpos; struct ureg_dst o_vpos, o_vmv[2]; unsigned i; @@ -112,7 +112,6 @@ create_ref_vert_shader(struct vl_mc *r) if (!shader) return NULL; - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); @@ -121,6 +120,8 @@ create_ref_vert_shader(struct vl_mc *r) (float)MACROBLOCK_HEIGHT / r->buffer_height) ); + /* XXX The position is not written, which may lead to undefined rendering. + * XXX This is a serious bug. */ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); From 1ad3ba4ad954b86751bd5b6ad0a431920bff9958 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 8 Jul 2011 10:34:38 -0700 Subject: [PATCH 028/600] glsl: Make prototype_string publicly available Also clarify the documentation for one of the parameters. Reviewed-by: Paul Berry Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke --- src/glsl/ast_function.cpp | 6 ++++-- src/glsl/ir.h | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 60a2c617f70..bdb73f48706 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -62,8 +62,10 @@ process_parameters(exec_list *instructions, exec_list *actual_parameters, * * \param return_type Return type of the function. May be \c NULL. * \param name Name of the function. - * \param parameters Parameter list for the function. This may be either a - * formal or actual parameter list. Only the type is used. + * \param parameters List of \c ir_instruction nodes representing the + * parameter list for the function. This may be either a + * formal (\c ir_variable) or actual (\c ir_rvalue) + * parameter list. Only the type is used. * * \return * A ralloced string representing the prototype of the function. diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 80ad3dd295e..9f277380c7d 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1669,4 +1669,8 @@ ir_has_call(ir_instruction *ir); extern void do_set_program_inouts(exec_list *instructions, struct gl_program *prog); +extern char * +prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters); + #endif /* IR_H */ From 02c5ae1b3fef75d5c0a715313a69e6b95ebd5b95 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 11 Jul 2011 10:46:01 -0700 Subject: [PATCH 029/600] glsl: Reject shaders that contain static recursion The GLSL 1.20 and later specs say: "Recursion is not allowed, not even statically. Static recursion is present if the static function call graph of the program contains cycles." Recursion is detected and rejected both a compile-time and at link-time. The complie-time check happens to detect some cases that may be removed by various optimization passes. The spec doesn't seem to allow this, but other vendors (e.g., NVIDIA) appear to only check at link-time after all optimizations. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=33885 Reviewed-by: Paul Berry Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke --- src/glsl/Makefile | 1 + src/glsl/ast_to_hir.cpp | 2 + src/glsl/ir.h | 26 ++ src/glsl/ir_function_detect_recursion.cpp | 371 ++++++++++++++++++++++ src/glsl/linker.cpp | 4 + 5 files changed, 404 insertions(+) create mode 100644 src/glsl/ir_function_detect_recursion.cpp diff --git a/src/glsl/Makefile b/src/glsl/Makefile index e0776c1b55d..d1422c2a4d6 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -39,6 +39,7 @@ CXX_SOURCES = \ ir.cpp \ ir_expression_flattening.cpp \ ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ ir_function.cpp \ ir_hierarchical_visitor.cpp \ ir_hv_accept.cpp \ diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 2312c297c40..c0524bf0bcc 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -83,6 +83,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) foreach_list_typed (ast_node, ast, link, & state->translation_unit) ast->hir(instructions, state); + + detect_recursion_unlinked(state, instructions); } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 9f277380c7d..50a9d6e1991 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1635,6 +1635,32 @@ visit_exec_list(exec_list *list, ir_visitor *visitor); */ void validate_ir_tree(exec_list *instructions); +struct _mesa_glsl_parse_state; +struct gl_shader_program; + +/** + * Detect whether an unlinked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c _mesa_glsl_error will be called to emit error messages for each function + * that is in the recursion cycle. + */ +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions); + +/** + * Detect whether a linked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c link_error_printf will be called to emit error messages for each function + * that is in the recursion cycle. In addition, + * \c gl_shader_program::LinkStatus will be set to false. + */ +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions); + /** * Make a clone of each IR instruction in a list * diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp new file mode 100644 index 00000000000..44a1cd0b950 --- /dev/null +++ b/src/glsl/ir_function_detect_recursion.cpp @@ -0,0 +1,371 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_detect_recursion.cpp + * Determine whether a shader contains static recursion. + * + * Consider the (possibly disjoint) graph of function calls in a shader. If a + * program contains recursion, this graph will contain a cycle. If a function + * is part of a cycle, it will have a caller and it will have a callee (it + * calls another function). + * + * To detect recursion, the function call graph is constructed. The graph is + * repeatedly reduced by removing any function that either has no callees + * (leaf functions) or has no caller. Eventually the only functions that + * remain will be the functions in the cycles. + * + * The GLSL spec is a bit wishy-washy about recursion. + * + * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "Behavior is undefined if recursion is used. Recursion means having any + * function appearing more than once at any one time in the run-time stack + * of function calls. That is, a function may not call itself either + * directly or indirectly. Compilers may give diagnostic messages when + * this is detectable at compile time, but not all such cases can be + * detected at compile time." + * + * From page 79 (page 85 of the PDF): + * + * "22) Should recursion be supported? + * + * DISCUSSION: Probably not necessary, but another example of limiting + * the language based on how it would directly map to hardware. One + * thought is that recursion would benefit ray tracing shaders. On the + * other hand, many recursion operations can also be implemented with the + * user managing the recursion through arrays. RenderMan doesn't support + * recursion. This could be added at a later date, if it proved to be + * necessary. + * + * RESOLVED on September 10, 2002: Implementations are not required to + * support recursion. + * + * CLOSED on September 10, 2002." + * + * From page 79 (page 85 of the PDF): + * + * "56) Is it an error for an implementation to support recursion if the + * specification says recursion is not supported? + * + * ADDED on September 10, 2002. + * + * DISCUSSION: This issues is related to Issue (22). If we say that + * recursion (or some other piece of functionality) is not supported, is + * it an error for an implementation to support it? Perhaps the + * specification should remain silent on these kind of things so that they + * could be gracefully added later as an extension or as part of the + * standard. + * + * RESOLUTION: Languages, in general, have programs that are not + * well-formed in ways a compiler cannot detect. Portability is only + * ensured for well-formed programs. Detecting recursion is an example of + * this. The language will say a well-formed program may not recurse, but + * compilers are not forced to detect that recursion may happen. + * + * CLOSED: November 29, 2002." + * + * In GLSL 1.10 the behavior of recursion is undefined. Compilers don't have + * to reject shaders (at compile-time or link-time) that contain recursion. + * Instead they could work, or crash, or kill a kitten. + * + * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec: + * + * "Recursion is not allowed, not even statically. Static recursion is + * present if the static function call graph of the program contains + * cycles." + * + * This langauge clears things up a bit, but it still leaves a lot of + * questions unanswered. + * + * - Is the error generated at compile-time or link-time? + * + * - Is it an error to have a recursive function that is never statically + * called by main or any function called directly or indirectly by main? + * Technically speaking, such a function is not in the "static function + * call graph of the program" at all. + * + * \bug + * If a shader has multiple cycles, this algorithm may erroneously complain + * about functions that aren't in any cycle, but are in the part of the call + * tree that connects them. For example, if the call graph consists of a + * cycle between A and B, and a cycle between D and E, and B also calls C + * which calls D, then this algorithm will report C as a function which "has + * static recursion" even though it is not part of any cycle. + * + * A better algorithm for cycle detection that doesn't have this drawback can + * be found here: + * + * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm + * + * \author Ian Romanick + */ +#include "main/core.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "linker.h" +#include "program/hash_table.h" + +struct call_node : public exec_node { + class function *func; +}; + +class function { +public: + function(ir_function_signature *sig) + : sig(sig) + { + /* empty */ + } + + + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + /* If the user *does* call delete, that's OK, we will just + * ralloc_free in that case. */ + static void operator delete(void *node) + { + ralloc_free(node); + } + + ir_function_signature *sig; + + /** List of functions called by this function. */ + exec_list callees; + + /** List of functions that call this function. */ + exec_list callers; +}; + +class has_recursion_visitor : public ir_hierarchical_visitor { +public: + has_recursion_visitor() + : current(NULL) + { + this->mem_ctx = ralloc_context(NULL); + this->function_hash = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~has_recursion_visitor() + { + hash_table_dtor(this->function_hash); + ralloc_free(this->mem_ctx); + } + + function *get_function(ir_function_signature *sig) + { + function *f = (function *) hash_table_find(this->function_hash, sig); + if (f == NULL) { + f = new(mem_ctx) function(sig); + hash_table_insert(this->function_hash, f, sig); + } + + return f; + } + + virtual ir_visitor_status visit_enter(ir_function_signature *sig) + { + this->current = this->get_function(sig); + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function_signature *sig) + { + (void) sig; + this->current = NULL; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_call *call) + { + /* At global scope this->current will be NULL. Since there is no way to + * call global scope, it can never be part of a cycle. Don't bother + * adding calls from global scope to the graph. + */ + if (this->current == NULL) + return visit_continue; + + function *const target = this->get_function(call->get_callee()); + + /* Create a link from the caller to the callee. + */ + call_node *node = new(mem_ctx) call_node; + node->func = target; + this->current->callees.push_tail(node); + + /* Create a link from the callee to the caller. + */ + node = new(mem_ctx) call_node; + node->func = this->current; + target->callers.push_tail(node); + return visit_continue; + } + + function *current; + struct hash_table *function_hash; + void *mem_ctx; + bool progress; +}; + +static void +destroy_links(exec_list *list, function *f) +{ + foreach_list_safe(node, list) { + struct call_node *n = (struct call_node *) node; + + /* If this is the right function, remove it. Note that the loop cannot + * terminate now. There can be multiple links to a function if it is + * either called multiple times or calls multiple times. + */ + if (n->func == f) + n->remove(); + } +} + + +/** + * Remove a function if it has either no in or no out links + */ +static void +remove_unlinked_functions(const void *key, void *data, void *closure) +{ + has_recursion_visitor *visitor = (has_recursion_visitor *) closure; + function *f = (function *) data; + + if (f->callers.is_empty() || f->callees.is_empty()) { + while (!f->callers.is_empty()) { + struct call_node *n = (struct call_node *) f->callers.pop_head(); + destroy_links(& n->func->callees, f); + } + + while (!f->callees.is_empty()) { + struct call_node *n = (struct call_node *) f->callees.pop_head(); + destroy_links(& n->func->callers, f); + } + + hash_table_remove(visitor->function_hash, key); + visitor->progress = true; + } +} + + +static void +emit_errors_unlinked(const void *key, void *data, void *closure) +{ + struct _mesa_glsl_parse_state *state = + (struct _mesa_glsl_parse_state *) closure; + function *f = (function *) data; + YYLTYPE loc; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, + "function `%s' has static recursion.", + proto); + ralloc_free(proto); +} + + +static void +emit_errors_linked(const void *key, void *data, void *closure) +{ + struct gl_shader_program *prog = + (struct gl_shader_program *) closure; + function *f = (function *) data; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + linker_error_printf(prog, + "function `%s' has static recursion.\n", + proto); + ralloc_free(proto); + prog->LinkStatus = false; +} + + +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state); +} + + +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_linked, prog); +} diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 5ec08446d16..fe570b6cc45 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1702,6 +1702,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; + detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir); + if (!prog->LinkStatus) + goto done; + while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, 32)) ; } From 76bccaff0c54aed10ffbc7c7dc744f1708921409 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 20 Jul 2011 20:16:27 -0700 Subject: [PATCH 030/600] glsl: Add ir_function_detect_recursion.cpp to SConscript. --- src/glsl/SConscript | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/SConscript b/src/glsl/SConscript index 1441cc74bd8..ea104abb823 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -50,6 +50,7 @@ glsl_sources = [ 'ir.cpp', 'ir_expression_flattening.cpp', 'ir_function_can_inline.cpp', + 'ir_function_detect_recursion.cpp', 'ir_function.cpp', 'ir_hierarchical_visitor.cpp', 'ir_hv_accept.cpp', From ea316c5e060cbd92b34e0d794c0707d4ca79e6e8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 21 Jul 2011 10:39:41 +0200 Subject: [PATCH 031/600] nouveau: hook up video decoding with nouveau_context This doesn't include nvfx since its context struct is not derived from common nouveau_context (yet). --- src/gallium/drivers/nouveau/Makefile | 3 +- src/gallium/drivers/nouveau/nouveau_context.h | 3 ++ src/gallium/drivers/nouveau/nouveau_screen.h | 1 + src/gallium/drivers/nouveau/nouveau_video.c | 39 +++++++++++++++++++ src/gallium/drivers/nv50/nv50_context.c | 2 + src/gallium/drivers/nv50/nv50_screen.c | 2 + src/gallium/drivers/nvc0/nvc0_context.c | 2 + src/gallium/drivers/nvc0/nvc0_screen.c | 5 +++ 8 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_video.c diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 3210d1ff77b..aae6d9889bb 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -10,6 +10,7 @@ LIBRARY_INCLUDES = \ C_SOURCES = nouveau_screen.c \ nouveau_fence.c \ nouveau_mm.c \ - nouveau_buffer.c + nouveau_buffer.c \ + nouveau_video.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 696e0d3f24e..19bf7c84ac7 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -23,4 +23,7 @@ nouveau_context(struct pipe_context *pipe) return (struct nouveau_context *)pipe; } +void +nouveau_context_init_vdec(struct nouveau_context *); + #endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index d910809a0ec..cf291c6c595 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -76,6 +76,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); void nouveau_screen_fini(struct nouveau_screen *); +void nouveau_screen_init_vdec(struct nouveau_screen *); #ifndef NOUVEAU_NVC0 diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c new file mode 100644 index 00000000000..32f038dae61 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -0,0 +1,39 @@ + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_context.h" + +static int +nouveau_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(pscreen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(pscreen); + default: + debug_printf("unknown video param: %d\n", param); + return 0; + } +} + +void +nouveau_screen_init_vdec(struct nouveau_screen *screen) +{ + screen->base.get_video_param = nouveau_screen_get_video_param; + screen->base.is_video_format_supported = vl_video_buffer_is_format_supported; +} + +void +nouveau_context_init_vdec(struct nouveau_context *nv) +{ + nv->pipe.create_video_decoder = vl_create_decoder; + nv->pipe.create_video_buffer = vl_video_buffer_create; +} diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index ac3e361a446..0d464063b5b 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -149,6 +149,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv) assert(nv50->draw); draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + nouveau_context_init_vdec(&nv50->base); + return pipe; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index a697ff5ecf7..4139b85a9ae 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -315,6 +315,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 983db23eedb..360afbb943e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -150,6 +150,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) assert(nvc0->draw); draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + nouveau_context_init_vdec(&nvc0->base); + return pipe; } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 605a0b04018..5d1b324dbff 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -24,6 +24,9 @@ #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + #include "nvc0_context.h" #include "nvc0_screen.h" @@ -373,6 +376,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nvc0_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) From 000896c0bb99f356e52854608a29476d3ade387c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 19 Jul 2011 03:05:07 +0200 Subject: [PATCH 032/600] mesa: GLES2 should return different error enums for invalid fbo queries ES 2.0.25 page 127 says: If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then querying any other pname will generate INVALID_ENUM. See also: b9e9df78a03edb35472c2e231aef4747e09db792 NOTE: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Ian Romanick --- src/mesa/main/fbobject.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 84969360d92..82eb7fb718d 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2134,10 +2134,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, { const struct gl_renderbuffer_attachment *att; struct gl_framebuffer *buffer; + GLenum err; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); + /* The error differs in GL andd GLES. */ + err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + buffer = get_framebuffer_target(ctx, target); if (!buffer) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -2188,7 +2192,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } else { assert(att->Type == GL_NONE); - *params = 0; + if (ctx->API == API_OPENGL) { + *params = 0; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameterivEXT(pname)"); + } } return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT: @@ -2196,7 +2205,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2214,7 +2223,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2232,7 +2241,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2246,7 +2255,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2267,7 +2276,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2301,7 +2310,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Texture) { From afc160e1c8f87d0a76e41f1100d528d8ab82ecc4 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 20 Jul 2011 18:52:33 +0800 Subject: [PATCH 033/600] u_vbuf_mgr: restore buffer offsets u_vbuf_upload_buffers modifies the buffer offsets. If they are not restored, and any of the vertex formats is not supported natively, the next u_vbuf_mgr_draw_begin call will translate the vertex buffers with incorrect buffer offsets. --- src/gallium/auxiliary/util/u_vbuf_mgr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 374fc336b83..19eb689cfb2 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -79,6 +79,8 @@ struct u_vbuf_mgr_priv { void *saved_ve, *fallback_ve; boolean ve_binding_lock; + unsigned saved_buffer_offset[PIPE_MAX_ATTRIBS]; + boolean any_user_vbs; boolean incompatible_vb_layout; }; @@ -488,6 +490,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer); pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + mgr->saved_buffer_offset[i] = vb->buffer_offset; if (!vb->buffer) { continue; @@ -647,6 +650,13 @@ u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb) { struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + /* buffer offsets were modified in u_vbuf_upload_buffers */ + if (mgr->any_user_vbs) { + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) + mgr->b.vertex_buffer[i].buffer_offset = mgr->saved_buffer_offset[i]; + } if (mgr->fallback_ve) { u_vbuf_translate_end(mgr); From f16d97feaa394826456e27250d5dfdb24df5cd57 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 034/600] softpipe: rename a function --- src/gallium/drivers/softpipe/sp_quad_pipe.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 2cfd02a22c6..addd47e2920 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -30,9 +30,9 @@ #include "sp_state.h" #include "pipe/p_shader_tokens.h" + static void -sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) +insert_stage_at_head(struct softpipe_context *sp, struct quad_stage *quad) { quad->next = sp->quad.first; sp->quad.first = quad; @@ -53,17 +53,17 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->quad.first = sp->quad.blend; if (early_depth_test) { - sp_push_quad_first( sp, sp->quad.shade ); - sp_push_quad_first( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); } else { - sp_push_quad_first( sp, sp->quad.depth_test ); - sp_push_quad_first( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); } #if !DO_PSTIPPLE_IN_DRAW_MODULE if (sp->rasterizer->poly_stipple_enable) - sp_push_quad_first( sp, sp->quad.pstipple ); + insert_stage_at_head( sp, sp->quad.pstipple ); #endif } From 9c1319d31d85f1e5bc61eef1bc963584623d0d51 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 035/600] softpipe: remove obsolete comment --- src/gallium/drivers/softpipe/sp_fs_exec.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 346e1b402ba..85e7141486a 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -193,10 +193,6 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe, { struct sp_exec_fragment_shader *shader; - /* Decide whether we'll be codegenerating this shader and if so do - * that now. - */ - shader = CALLOC_STRUCT(sp_exec_fragment_shader); if (!shader) return NULL; From 2253906da3c506bb5378a8f2fa203ed0c9021171 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 036/600] tgsi: add info fields for fragcoord origin, center, etc --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 38 +++++++++++++++++++------- src/gallium/auxiliary/tgsi/tgsi_scan.h | 3 ++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 83c6ac75e54..f165f8240e6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -200,19 +200,20 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)reg); } break; + case TGSI_TOKEN_TYPE_PROPERTY: - { - const struct tgsi_full_property *fullprop - = &parse.FullToken.FullProperty; + { + const struct tgsi_full_property *fullprop + = &parse.FullToken.FullProperty; - info->properties[info->num_properties].name = - fullprop->Property.PropertyName; - memcpy(info->properties[info->num_properties].data, - fullprop->u, 8 * sizeof(unsigned));; + info->properties[info->num_properties].name = + fullprop->Property.PropertyName; + memcpy(info->properties[info->num_properties].data, + fullprop->u, 8 * sizeof(unsigned));; - ++info->num_properties; - } - break; + ++info->num_properties; + } + break; default: assert( 0 ); @@ -222,6 +223,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); + /* extract simple properties */ + for (i = 0; i < info->num_properties; ++i) { + switch (info->properties[i].name) { + case TGSI_PROPERTY_FS_COORD_ORIGIN: + info->origin_lower_left = info->properties[i].data[0]; + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + info->pixel_center_integer = info->properties[i].data[0]; + break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + info->color0_writes_all_cbufs = info->properties[i].data[0]; + break; + default: + ; + } + } + tgsi_parse_free (&parse); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 53ab3d509dd..d6e593b3968 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -68,6 +68,9 @@ struct tgsi_shader_info boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_instanceid; + boolean origin_lower_left; + boolean pixel_center_integer; + boolean color0_writes_all_cbufs; /** * Bitmask indicating which register files are accessed with From 4736c0ba8670637970a971da6ec83cf1d2620a32 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 037/600] softpipe: use tgsi_shader_info fields for fragcoord origin, center, etc. --- src/gallium/drivers/softpipe/sp_quad_blend.c | 2 +- src/gallium/drivers/softpipe/sp_setup.c | 8 ++++---- src/gallium/drivers/softpipe/sp_state.h | 3 --- src/gallium/drivers/softpipe/sp_state_shader.c | 9 --------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 76cfc0bf51c..4a4e0022110 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -797,7 +797,7 @@ blend_fallback(struct quad_stage *qs, unsigned cbuf; boolean write_all; - write_all = softpipe->fs->color0_writes_all_cbufs; + write_all = softpipe->fs->info.color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0ce28f4c6ee..48f29f87661 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -570,15 +570,15 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) { struct sp_fragment_shader* spfs = setup->softpipe->fs; /*X*/ - setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5; + setup->coef[slot].a0[0] = spfs->info.pixel_center_integer ? 0.0 : 0.5; setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) - + (spfs->pixel_center_integer ? 0.0 : 0.5); + (spfs->info.origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + + (spfs->info.pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; - setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; + setup->coef[slot].dady[1] = spfs->info.origin_lower_left ? -1.0 : 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index bb19f8cff20..6c14dd132e9 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -72,9 +72,6 @@ struct sp_fragment_shader { struct draw_fragment_shader *draw_shader; - boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ - boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ - boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */ void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 3dec5de3cc4..80af2578839 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -73,15 +73,6 @@ softpipe_create_fs_state(struct pipe_context *pipe, /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &state->info); - for (i = 0; i < state->info.num_properties; ++i) { - if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) - state->origin_lower_left = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) - state->pixel_center_integer = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) - state->color0_writes_all_cbufs = state->info.properties[i].data[0]; - } - return state; } From 3dde6be908d827f4d6d54e0968ae83c2c4dfa87c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 038/600] util: assorted updates to polygon stipple helper --- src/gallium/auxiliary/util/u_pstipple.c | 43 +++++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index f79a6938d1d..ac0df8c1a9c 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -52,6 +52,7 @@ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" /** Approx number of new tokens for instructions in pstip_transform_inst() */ #define NUM_NEW_TOKENS 50 @@ -175,6 +176,7 @@ util_pstipple_create_sampler(struct pipe_context *pipe) */ struct pstip_transform_context { struct tgsi_transform_context base; + struct tgsi_shader_info info; uint tempsUsed; /**< bitmask */ int wincoordInput; int maxInput; @@ -183,12 +185,13 @@ struct pstip_transform_context { int texTemp; /**< temp registers */ int numImmed; boolean firstInstruction; + uint coordOrigin; }; /** * TGSI declaration transform callback. - * Look for a free sampler, a free input attrib, and two free temp regs. + * Track samplers used, temps used, inputs used. */ static void pstip_transform_decl(struct tgsi_transform_context *ctx, @@ -197,10 +200,11 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + /* XXX we can use tgsi_shader_info instead of some of this */ + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { pctx->samplersUsed |= 1 << i; } } @@ -211,8 +215,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -243,8 +246,16 @@ free_bit(uint bitfield) /** * TGSI instruction transform callback. - * Replace writes to result.color w/ a temp reg. - * Upon END instruction, insert texture sampling code for antialiasing. + * Before the first instruction, insert our new code to sample the + * stipple texture (using the fragment coord register) then kill the + * fragment if the stipple texture bit is off. + * + * Insert: + * declare new registers + * MUL texTemp, INPUT[wincoord], 1/32; + * TEX texTemp, texTemp, sampler; + * KIL -texTemp; # if -texTemp < 0, KILL fragment + * [...original code...] */ static void pstip_transform_inst(struct tgsi_transform_context *ctx, @@ -261,7 +272,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, uint i; int wincoordInput; - /* find free sampler */ + /* find free texture sampler */ pctx->freeSampler = free_bit(pctx->samplersUsed); if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; @@ -271,7 +282,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, else wincoordInput = pctx->wincoordInput; - /* find one free temp reg */ + /* find one free temp register */ for (i = 0; i < 32; i++) { if ((pctx->tempsUsed & (1 << i)) == 0) { /* found a free temp */ @@ -397,6 +408,7 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe, struct pipe_shader_state *new_fs; struct pstip_transform_context transform; const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS; + unsigned i; new_fs = MALLOC(sizeof(*new_fs)); if (!new_fs) @@ -408,22 +420,33 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe, return NULL; } + /* Setup shader transformation info/context. + */ memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; transform.maxInput = -1; transform.texTemp = -1; transform.firstInstruction = TRUE; + transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT; transform.base.transform_instruction = pstip_transform_inst; transform.base.transform_declaration = pstip_transform_decl; transform.base.transform_immediate = pstip_transform_immed; + tgsi_scan_shader(fs->tokens, &transform.info); + + /* find fragment coordinate origin property */ + for (i = 0; i < transform.info.num_properties; i++) { + if (transform.info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) + transform.coordOrigin = transform.info.properties[i].data[0]; + } + tgsi_transform_shader(fs->tokens, (struct tgsi_token *) new_fs->tokens, newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(fs->tokens, 0); - tgsi_dump(pstip_fs.tokens, 0); + tgsi_dump(new_fs->tokens, 0); #endif assert(transform.freeSampler < PIPE_MAX_SAMPLERS); From c534f11164bbecf25eb2b1e697f9511eceb0c86f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 039/600] softpipe: implement fragment shader variants We'll need shader variants to accomodate the new polygon stipple utility. --- src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_fs.h | 16 ++- src/gallium/drivers/softpipe/sp_fs_exec.c | 36 +++--- src/gallium/drivers/softpipe/sp_fs_sse.c | 25 ++-- src/gallium/drivers/softpipe/sp_quad_blend.c | 2 +- .../drivers/softpipe/sp_quad_depth_test.c | 10 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 10 +- src/gallium/drivers/softpipe/sp_quad_pipe.c | 6 +- src/gallium/drivers/softpipe/sp_setup.c | 37 +++--- src/gallium/drivers/softpipe/sp_state.h | 54 ++++++--- .../drivers/softpipe/sp_state_derived.c | 54 +++++++-- .../drivers/softpipe/sp_state_sampler.c | 3 +- .../drivers/softpipe/sp_state_shader.c | 114 +++++++++++++++--- 13 files changed, 251 insertions(+), 117 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a572ee8cf00..79291abca97 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -64,6 +64,7 @@ struct softpipe_context { struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; + struct sp_fragment_shader_variant *fs_variant; struct sp_vertex_shader *vs; struct sp_geometry_shader *gs; struct sp_velems_state *velems; diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h index 4792ace3a33..d46d7d5a657 100644 --- a/src/gallium/drivers/softpipe/sp_fs.h +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -31,17 +31,15 @@ #ifndef SP_FS_H #define SP_FS_H -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); struct tgsi_interp_coef; struct tgsi_exec_vector; diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 85e7141486a..779b8c4995c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -42,25 +42,25 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_exec_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; /* No other members for now */ }; /** cast wrapper */ static INLINE struct sp_exec_fragment_shader * -sp_exec_fragment_shader(const struct sp_fragment_shader *base) +sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var) { - return (struct sp_exec_fragment_shader *) base; + return (struct sp_exec_fragment_shader *) var; } static void -exec_prepare( const struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -68,9 +68,9 @@ exec_prepare( const struct sp_fragment_shader *base, * Bind tokens/shader to the interpreter's machine state. * Avoid redundant binding. */ - if (machine->Tokens != base->shader.tokens) { + if (machine->Tokens != var->tokens) { tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, + var->tokens, PIPE_MAX_SAMPLERS, samplers ); } @@ -118,7 +118,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * interface: */ static unsigned -exec_run( const struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -136,9 +136,9 @@ exec_run( const struct sp_fragment_shader *base, /* store outputs */ { - const ubyte *sem_name = base->info.output_semantic_name; - const ubyte *sem_index = base->info.output_semantic_index; - const uint n = base->info.num_outputs; + const ubyte *sem_name = var->info.output_semantic_name; + const ubyte *sem_index = var->info.output_semantic_index; + const uint n = var->info.num_outputs; uint i; for (i = 0; i < n; i++) { switch (sem_name[i]) { @@ -180,16 +180,16 @@ exec_run( const struct sp_fragment_shader *base, static void -exec_delete( struct sp_fragment_shader *base ) +exec_delete( struct sp_fragment_shader_variant *var ) { - FREE((void *) base->shader.tokens); - FREE(base); + FREE( (void *) var->tokens ); + FREE(var); } -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_exec_fragment_shader *shader; @@ -197,8 +197,6 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe, if (!shader) return NULL; - /* we need to keep a local copy of the tokens */ - shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); shader->base.prepare = exec_prepare; shader->base.run = exec_run; shader->base.delete = exec_delete; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 5b18cd035e3..c873af125bd 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -48,11 +48,11 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_sse_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; struct x86_function sse2_program; tgsi_sse2_fs_function func; float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; @@ -61,14 +61,14 @@ struct sp_sse_fragment_shader /** cast wrapper */ static INLINE struct sp_sse_fragment_shader * -sp_sse_fragment_shader(const struct sp_fragment_shader *base) +sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base) { return (struct sp_sse_fragment_shader *) base; } static void -fs_sse_prepare( const struct sp_fragment_shader *base, +fs_sse_prepare( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -119,7 +119,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * TODO: process >1 quad at a time */ static unsigned -fs_sse_run( const struct sp_fragment_shader *base, +fs_sse_run( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -189,7 +189,7 @@ fs_sse_run( const struct sp_fragment_shader *base, static void -fs_sse_delete( struct sp_fragment_shader *base ) +fs_sse_delete( struct sp_fragment_shader_variant *base ) { struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base); @@ -198,9 +198,9 @@ fs_sse_delete( struct sp_fragment_shader *base ) } -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_sse_fragment_shader *shader; @@ -226,7 +226,6 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, return NULL; } - shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */ shader->base.prepare = fs_sse_prepare; shader->base.run = fs_sse_run; shader->base.delete = fs_sse_delete; @@ -239,9 +238,9 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, /* Maybe put this variant in the header file. */ -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { return NULL; } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 4a4e0022110..04bfd14b7c6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -797,7 +797,7 @@ blend_fallback(struct quad_stage *qs, unsigned cbuf; boolean write_all; - write_all = softpipe->fs->info.color0_writes_all_cbufs; + write_all = softpipe->fs_variant->info.color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 89b2a91fc1f..9e98801810d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -726,9 +726,9 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned nr) { unsigned i, pass = 0; - const struct sp_fragment_shader *fs = qs->softpipe->fs; - boolean interp_depth = !fs->info.writes_z; - boolean shader_stencil_ref = fs->info.writes_stencil; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + boolean interp_depth = !fsInfo->writes_z; + boolean shader_stencil_ref = fsInfo->writes_stencil; struct depth_data data; data.use_shader_stencil_refs = FALSE; @@ -837,7 +837,9 @@ choose_depth_test(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - boolean interp_depth = !qs->softpipe->fs->info.writes_z; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + + boolean interp_depth = !fsInfo->writes_z; boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 90f4787d599..d74d6d4914e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -74,7 +74,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) struct tgsi_exec_machine *machine = softpipe->fs_machine; /* run shader */ - return softpipe->fs->run( softpipe->fs, machine, quad ); + return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad ); } @@ -140,10 +140,10 @@ shade_begin(struct quad_stage *qs) { struct softpipe_context *softpipe = qs->softpipe; - softpipe->fs->prepare( softpipe->fs, - softpipe->fs_machine, - (struct tgsi_sampler **) - softpipe->tgsi.frag_samplers_list ); + softpipe->fs_variant->prepare( softpipe->fs_variant, + softpipe->fs_machine, + (struct tgsi_sampler **) + softpipe->tgsi.frag_samplers_list ); qs->next->begin(qs->next); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index addd47e2920..a98f8b7bde5 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -46,9 +46,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z && - !sp->fs->info.writes_stencil; + !sp->fs_variant->info.uses_kill && + !sp->fs_variant->info.writes_z && + !sp->fs_variant->info.writes_stencil; sp->quad.first = sp->quad.blend; diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 48f29f87661..b82594ca2a5 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -568,17 +568,18 @@ tri_persp_coeff(struct setup_context *setup, static void setup_fragcoord_coeff(struct setup_context *setup, uint slot) { - struct sp_fragment_shader* spfs = setup->softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; + /*X*/ - setup->coef[slot].a0[0] = spfs->info.pixel_center_integer ? 0.0 : 0.5; + setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0 : 0.5; setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->info.origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) - + (spfs->info.pixel_center_integer ? 0.0 : 0.5); + (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + + (fsInfo->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; - setup->coef[slot].dady[1] = spfs->info.origin_lower_left ? -1.0 : 1.0; + setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0 : 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; @@ -599,7 +600,7 @@ static void setup_tri_coefficients(struct setup_context *setup) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float v[3]; @@ -618,7 +619,7 @@ setup_tri_coefficients(struct setup_context *setup) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -632,7 +633,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -642,7 +643,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -654,7 +655,7 @@ setup_tri_coefficients(struct setup_context *setup) assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -939,7 +940,7 @@ setup_line_coefficients(struct setup_context *setup, const float (*v1)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float area; @@ -974,7 +975,7 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -987,7 +988,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -996,7 +997,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -1008,7 +1009,7 @@ setup_line_coefficients(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1188,7 +1189,7 @@ sp_setup_point(struct setup_context *setup, const float (*v0)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const int sizeAttr = setup->softpipe->psize_slot; const float size = sizeAttr > 0 ? v0[sizeAttr][0] @@ -1232,7 +1233,7 @@ sp_setup_point(struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -1255,7 +1256,7 @@ sp_setup_point(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 6c14dd132e9..243f7aab8ba 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -60,31 +60,43 @@ struct tgsi_exec_machine; struct vertex_info; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; +struct sp_fragment_shader_variant_key +{ + int foo; /* XXX temporary */ +}; + +struct sp_fragment_shader_variant +{ + const struct tgsi_token *tokens; + struct sp_fragment_shader_variant_key key; struct tgsi_shader_info info; + /* See comments about this elsewhere */ +#if 0 struct draw_fragment_shader *draw_shader; +#endif - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler **samplers); + void (*prepare)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers); - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); + unsigned (*run)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad); + + /* Deletes this instance of the object */ + void (*delete)(struct sp_fragment_shader_variant *shader); + + struct sp_fragment_shader_variant *next; +}; - void (*delete)( struct sp_fragment_shader * ); +/** Subclass of pipe_shader_state */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + struct sp_fragment_shader_variant *variants; + struct draw_fragment_shader *draw_shader; }; @@ -138,7 +150,7 @@ softpipe_set_framebuffer_state(struct pipe_context *, const struct pipe_framebuffer_state *); void -softpipe_update_derived( struct softpipe_context *softpipe ); +softpipe_update_derived(struct softpipe_context *softpipe); void softpipe_draw_vbo(struct pipe_context *pipe, @@ -167,4 +179,10 @@ struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + #endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f9590eb0b24..583d0bd9f7b 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -64,7 +64,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; const uint num = draw_num_shader_outputs(softpipe->draw); uint i; @@ -84,11 +84,11 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < spfs->info.num_inputs; i++) { + for (i = 0; i < fsInfo->num_inputs; i++) { int src; enum interp_mode interp; - switch (spfs->info.input_interpolate[i]) { + switch (fsInfo->input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: interp = INTERP_CONSTANT; break; @@ -103,7 +103,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) interp = INTERP_LINEAR; } - switch (spfs->info.input_semantic_name[i]) { + switch (fsInfo->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: interp = INTERP_POS; break; @@ -117,8 +117,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_shader_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + fsInfo->input_semantic_name[i], + fsInfo->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } @@ -241,10 +241,46 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } +static void +update_fragment_shader(struct softpipe_context *softpipe) +{ + struct sp_fragment_shader_variant_key key; + + memset(&key, 0, sizeof(key)); + + if (softpipe->fs) { + softpipe->fs_variant = softpipe_find_fs_variant(softpipe, + softpipe->fs, &key); + } + else { + softpipe->fs_variant = NULL; + } + + /* This would be the logical place to pass the fragment shader + * to the draw module. However, doing this here, during state + * validation, causes problems with the 'draw' module helpers for + * wide/AA/stippled lines. + * In principle, the draw's fragment shader should be per-variant + * but that doesn't work. So we use a single draw fragment shader + * per fragment shader, not per variant. + */ +#if 0 + if (softpipe->fs_variant) { + draw_bind_fragment_shader(softpipe->draw, + softpipe->fs_variant->draw_shader); + } + else { + draw_bind_fragment_shader(softpipe->draw, NULL); + } +#endif +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ -void softpipe_update_derived( struct softpipe_context *softpipe ) +void +softpipe_update_derived(struct softpipe_context *softpipe) { struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); @@ -255,6 +291,10 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) softpipe->dirty |= SP_NEW_TEXTURE; } + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS)) + update_fragment_shader(softpipe); + if (softpipe->dirty & (SP_NEW_SAMPLER | SP_NEW_TEXTURE | SP_NEW_FS | diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 60331bc4976..16023c990a7 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -373,8 +373,9 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe) } } - for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { + for (i = 0; i <= softpipe->fs_variant->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->fragment_samplers[i]) { + assert(softpipe->fragment_sampler_views[i]->texture); softpipe->tgsi.frag_samplers_list[i] = get_sampler_variant( i, sp_sampler(softpipe->fragment_samplers[i]), diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 80af2578839..ddb9a98b45f 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -42,37 +42,91 @@ #include "tgsi/tgsi_parse.h" +/** + * Create a new fragment shader variant. + */ +static struct sp_fragment_shader_variant * +create_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + struct pipe_shader_state *curfs = &fs->shader; + + /* codegen, create variant object */ + var = softpipe_create_fs_variant_sse(softpipe, curfs); + if (!var) { + var = softpipe_create_fs_variant_exec(softpipe, curfs); + } + + if (var) { + var->key = *key; + var->tokens = tgsi_dup_tokens(curfs->tokens); + + tgsi_scan_shader(var->tokens, &var->info); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + /* draw's fs state */ + var->draw_shader = draw_create_fragment_shader(softpipe->draw, + &fs->shader); + if (!var->draw_shader) { + var->delete(var); + FREE((void *) var->tokens); + return NULL; + } +#endif + + /* insert variant into linked list */ + var->next = fs->variants; + fs->variants = var; + } + + return var; +} + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *sp, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + + for (var = fs->variants; var; var = var->next) { + if (memcmp(&var->key, key, sizeof(*key)) == 0) { + /* found it */ + return var; + } + } + + return create_fs_variant(sp, fs, key); +} + + static void * softpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct sp_fragment_shader *state; - unsigned i; + struct sp_fragment_shader *state = CALLOC_STRUCT(sp_fragment_shader); /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); - /* codegen */ - state = softpipe_create_fs_sse( softpipe, templ ); - if (!state) { - state = softpipe_create_fs_exec( softpipe, templ ); - } - - if (!state) - return NULL; + /* we need to keep a local copy of the tokens */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); /* draw's fs state */ - state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ); + state->draw_shader = draw_create_fragment_shader(softpipe->draw, + &state->shader); if (!state->draw_shader) { - state->delete( state ); + FREE((void *) state->shader.tokens); + FREE(state); return NULL; } - /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &state->info); - return state; } @@ -81,6 +135,7 @@ static void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader *state = (struct sp_fragment_shader *) fs; if (softpipe->fs == fs) return; @@ -89,8 +144,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) softpipe->fs = fs; - draw_bind_fragment_shader(softpipe->draw, - (softpipe->fs ? softpipe->fs->draw_shader : NULL)); + if (fs == NULL) + softpipe->fs_variant = NULL; + + if (state) + draw_bind_fragment_shader(softpipe->draw, + state->draw_shader); + else + draw_bind_fragment_shader(softpipe->draw, NULL); softpipe->dirty |= SP_NEW_FS; } @@ -101,8 +162,9 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; + struct sp_fragment_shader_variant *var, *next_var; - assert(fs != softpipe_context(pipe)->fs); + assert(fs != softpipe->fs); if (softpipe->fs_machine->Tokens == state->shader.tokens) { /* unbind the shader from the tgsi executor if we're @@ -111,9 +173,23 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL); } + /* delete variants */ + for (var = state->variants; var; var = next_var) { + next_var = var->next; + + assert(var != softpipe->fs_variant); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + draw_delete_fragment_shader(softpipe->draw, var->draw_shader); +#endif + + var->delete(var); + } + draw_delete_fragment_shader(softpipe->draw, state->draw_shader); - state->delete( state ); + FREE((void *) state->shader.tokens); } From 57aa597b3d5dac0fc59c05557dafec59e14e1019 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jul 2011 09:55:22 -0600 Subject: [PATCH 040/600] softpipe: use the polygon stipple utility module This is an alternative to the draw module's polygon stipple stage. The softpipe implementation here is just a test. The advantange of using the new polygon stipple utility module (with other drivers) is we can avoid software vertex processing in the draw module and get much better performance. Polygon stipple doesn't require special vertex processing like the other draw module stage. --- src/gallium/drivers/softpipe/sp_clear.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 14 ++++ src/gallium/drivers/softpipe/sp_context.h | 14 +++- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 +- src/gallium/drivers/softpipe/sp_quad_pipe.c | 2 +- src/gallium/drivers/softpipe/sp_setup.c | 2 +- src/gallium/drivers/softpipe/sp_state.h | 12 ++- .../drivers/softpipe/sp_state_derived.c | 78 ++++++++++++++++++- .../drivers/softpipe/sp_state_shader.c | 17 +++- 9 files changed, 131 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index ae3f00f3387..22e8a2e5817 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -60,7 +60,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, return; #if 0 - softpipe_update_derived(softpipe); /* not needed?? */ + softpipe_update_derived(softpipe, PIPE_PRIM_TRIANGLES); /* not needed?? */ #endif if (buffers & PIPE_CLEAR_COLOR) { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index ce22f646228..eabf2dae3fc 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -35,6 +35,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "util/u_inlines.h" #include "tgsi/tgsi_exec.h" #include "sp_clear.h" @@ -88,6 +89,14 @@ softpipe_destroy( struct pipe_context *pipe ) struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->pstipple.sampler) + pipe->delete_sampler_state(pipe, softpipe->pstipple.sampler); + + pipe_resource_reference(&softpipe->pstipple.texture, NULL); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, NULL); +#endif + if (softpipe->draw) draw_destroy( softpipe->draw ); @@ -341,6 +350,11 @@ softpipe_create_context( struct pipe_screen *screen, sp_init_surface_functions(softpipe); +#if DO_PSTIPPLE_IN_HELPER_MODULE + /* create the polgon stipple sampler */ + softpipe->pstipple.sampler = util_pstipple_create_sampler(&softpipe->pipe); +#endif + return &softpipe->pipe; fail: diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 79291abca97..410b0a65792 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -38,8 +38,11 @@ #include "sp_quad_pipe.h" -/** Do polygon stipple in the driver here, or in the draw module? */ -#define DO_PSTIPPLE_IN_DRAW_MODULE 1 +/** Do polygon stipple in the draw module? */ +#define DO_PSTIPPLE_IN_DRAW_MODULE 0 + +/** Do polygon stipple with the util module? */ +#define DO_PSTIPPLE_IN_HELPER_MODULE 1 struct softpipe_vbuf_render; @@ -144,6 +147,13 @@ struct softpipe_context { struct pipe_query *render_cond_query; uint render_cond_mode; + /** Polygon stipple items */ + struct { + struct pipe_resource *texture; + struct pipe_sampler_state *sampler; + struct pipe_sampler_view *sampler_view; + } pstipple; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 01b4ca985d0..69b5b96b4fd 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -64,7 +64,7 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); @@ -122,7 +122,7 @@ softpipe_draw_vbo(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index a98f8b7bde5..0c4506ae8f4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -61,7 +61,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp) insert_stage_at_head( sp, sp->quad.shade ); } -#if !DO_PSTIPPLE_IN_DRAW_MODULE +#if !DO_PSTIPPLE_IN_DRAW_MODULE && !DO_PSTIPPLE_IN_HELPER_MODULE if (sp->rasterizer->poly_stipple_enable) insert_stage_at_head( sp, sp->quad.pstipple ); #endif diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index b82594ca2a5..656d001809f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1397,7 +1397,7 @@ sp_setup_prepare(struct setup_context *setup) struct softpipe_context *sp = setup->softpipe; if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } /* Note: nr_attrs is only used for debugging (vertex printing) */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 243f7aab8ba..ec4c8cf5e4d 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -62,7 +62,7 @@ struct vertex_info; struct sp_fragment_shader_variant_key { - int foo; /* XXX temporary */ + boolean polygon_stipple; }; @@ -72,6 +72,8 @@ struct sp_fragment_shader_variant struct sp_fragment_shader_variant_key key; struct tgsi_shader_info info; + unsigned stipple_sampler_unit; + /* See comments about this elsewhere */ #if 0 struct draw_fragment_shader *draw_shader; @@ -150,7 +152,7 @@ softpipe_set_framebuffer_state(struct pipe_context *, const struct pipe_framebuffer_state *); void -softpipe_update_derived(struct softpipe_context *softpipe); +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim); void softpipe_draw_vbo(struct pipe_context *pipe, @@ -179,6 +181,12 @@ struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + struct sp_fragment_shader_variant * softpipe_find_fs_variant(struct softpipe_context *softpipe, struct sp_fragment_shader *fs, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 583d0bd9f7b..fd688089a3e 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,8 +25,10 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" @@ -242,12 +244,15 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) static void -update_fragment_shader(struct softpipe_context *softpipe) +update_fragment_shader(struct softpipe_context *softpipe, unsigned prim) { struct sp_fragment_shader_variant_key key; memset(&key, 0, sizeof(key)); + if (prim == PIPE_PRIM_TRIANGLES) + key.polygon_stipple = softpipe->rasterizer->poly_stipple_enable; + if (softpipe->fs) { softpipe->fs_variant = softpipe_find_fs_variant(softpipe, softpipe->fs, &key); @@ -276,11 +281,63 @@ update_fragment_shader(struct softpipe_context *softpipe) } +/** + * This should be called when the polygon stipple pattern changes. + * We create a new texture from the stipple pattern and create a new + * sampler view. + */ +static void +update_polygon_stipple_pattern(struct softpipe_context *softpipe) +{ + struct pipe_resource *tex; + struct pipe_sampler_view *view; + + tex = util_pstipple_create_stipple_texture(&softpipe->pipe, + softpipe->poly_stipple.stipple); + pipe_resource_reference(&softpipe->pstipple.texture, tex); + + view = util_pstipple_create_sampler_view(&softpipe->pipe, tex); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, view); +} + + +/** + * Should be called when polygon stipple is enabled/disabled or when + * the fragment shader changes. + * We add/update the fragment sampler and sampler views to sample from + * the polygon stipple texture. The texture unit that we use depends on + * the fragment shader (we need to use a unit not otherwise used by the + * shader). + */ +static void +update_polygon_stipple_enable(struct softpipe_context *softpipe, unsigned prim) +{ + if (prim == PIPE_PRIM_TRIANGLES && + softpipe->fs_variant->key.polygon_stipple) { + const unsigned unit = softpipe->fs_variant->stipple_sampler_unit; + + assert(unit >= softpipe->num_fragment_samplers); + + /* sampler state */ + softpipe->fragment_samplers[unit] = softpipe->pstipple.sampler; + + /* sampler view */ + pipe_sampler_view_reference(&softpipe->fragment_sampler_views[unit], + softpipe->pstipple.sampler_view); + + sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[unit], + softpipe->pstipple.sampler_view); + + softpipe->dirty |= SP_NEW_SAMPLER; + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ void -softpipe_update_derived(struct softpipe_context *softpipe) +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim) { struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); @@ -290,10 +347,23 @@ softpipe_update_derived(struct softpipe_context *softpipe) softpipe->tex_timestamp = sp_screen->timestamp; softpipe->dirty |= SP_NEW_TEXTURE; } - + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & SP_NEW_STIPPLE) + /* before updating samplers! */ + update_polygon_stipple_pattern(softpipe); +#endif + if (softpipe->dirty & (SP_NEW_RASTERIZER | SP_NEW_FS)) - update_fragment_shader(softpipe); + update_fragment_shader(softpipe, prim); + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_STIPPLE | + SP_NEW_FS)) + update_polygon_stipple_enable(softpipe, prim); +#endif if (softpipe->dirty & (SP_NEW_SAMPLER | SP_NEW_TEXTURE | diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index ddb9a98b45f..da895270aa9 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_pstipple.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" #include "draw/draw_gs.h" @@ -51,7 +52,15 @@ create_fs_variant(struct softpipe_context *softpipe, const struct sp_fragment_shader_variant_key *key) { struct sp_fragment_shader_variant *var; - struct pipe_shader_state *curfs = &fs->shader; + struct pipe_shader_state *stipple_fs = NULL, *curfs = &fs->shader; + unsigned unit = 0; + + if (key->polygon_stipple) { + /* get new shader that implements polygon stippling */ + stipple_fs = util_pstipple_create_fragment_shader(&softpipe->pipe, + curfs, &unit); + curfs = stipple_fs; + } /* codegen, create variant object */ var = softpipe_create_fs_variant_sse(softpipe, curfs); @@ -62,6 +71,7 @@ create_fs_variant(struct softpipe_context *softpipe, if (var) { var->key = *key; var->tokens = tgsi_dup_tokens(curfs->tokens); + var->stipple_sampler_unit = unit; tgsi_scan_shader(var->tokens, &var->info); @@ -82,6 +92,11 @@ create_fs_variant(struct softpipe_context *softpipe, fs->variants = var; } + if (stipple_fs) { + free((void *) stipple_fs->tokens); + free(stipple_fs); + } + return var; } From 50e32fefb1140a42101b1154d3df78db4906ee38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 21 Jul 2011 22:31:24 +0200 Subject: [PATCH 041/600] configure.ac: check for libdrm_radeon only when building classic --- configure.ac | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index f72db119fb3..86ba87b39e8 100644 --- a/configure.ac +++ b/configure.ac @@ -1073,11 +1073,6 @@ AC_SUBST([MESA_MODULES]) AC_SUBST([HAVE_XF86VIDMODE]) -PKG_CHECK_MODULES([LIBDRM_RADEON], - [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED], - HAVE_LIBDRM_RADEON=yes, - HAVE_LIBDRM_RADEON=no) - dnl dnl More GLX setup dnl @@ -1270,6 +1265,11 @@ esac case $DRI_DIRS in *radeon*|*r200*|*r300*|*r600*) + PKG_CHECK_MODULES([LIBDRM_RADEON], + [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED], + HAVE_LIBDRM_RADEON=yes, + HAVE_LIBDRM_RADEON=no) + if test "x$HAVE_LIBDRM_RADEON" = xyes; then RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS" RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS From a87afba50529c6ae6762a3da68d4f31bc431e064 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 21 Jul 2011 13:53:34 -0400 Subject: [PATCH 042/600] Revert "g3dvl: Preserve previously rendered components for MC output." This reverts commit b56daf71d2f63d044d4c53ab49c6f87e02991a28. The bug is actually in softpipe's blend and writemask interaction. --- src/gallium/auxiliary/vl/vl_mc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index 81a05b539f3..e5ae0f72c4c 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -591,7 +591,7 @@ vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface) } static void -prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned component, unsigned mask) +prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask) { struct vl_mc *renderer; @@ -600,7 +600,7 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned component, unsign renderer = buffer->renderer; renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state); - if (buffer->surface_cleared || component > 0) + if (buffer->surface_cleared) renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]); else renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]); @@ -616,7 +616,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) assert(buffer && ref); - prepare_pipe_4_rendering(buffer, 0, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B); + prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B); renderer = buffer->renderer; @@ -644,7 +644,7 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num if (num_instances == 0) return; - prepare_pipe_4_rendering(buffer, component, mask); + prepare_pipe_4_rendering(buffer, mask); renderer = buffer->renderer; From 12c22cab77f35a887d9f6790e0de4a8fa4b3b575 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Thu, 7 Jul 2011 13:03:45 -0700 Subject: [PATCH 043/600] mesa: Add an ifndef guard around the definition of the INLINE macro Several Mesa headers redundantly define the INLINE macro. Adding this guard prevents the compiler from complaining about macro redefinition. Reviewed-by: Brian Paul Reviewed-by: Kenneth Graunke Reviewed-by: Chad Versace --- src/mesa/main/compiler.h | 42 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 743841be4ef..d736fdfc58a 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -139,26 +139,28 @@ extern "C" { /** * Function inlining */ -#if defined(__GNUC__) -# define INLINE __inline__ -#elif defined(__MSC__) -# define INLINE __inline -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#elif defined(__SUNPRO_C) && defined(__C99FEATURES__) -# define INLINE inline -# define __inline inline -# define __inline__ inline -#elif (__STDC_VERSION__ >= 199901L) /* C99 */ -# define INLINE inline -#else -# define INLINE +#ifndef INLINE +# if defined(__GNUC__) +# define INLINE __inline__ +# elif defined(__MSC__) +# define INLINE __inline +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +# define INLINE inline +# define __inline inline +# define __inline__ inline +# elif (__STDC_VERSION__ >= 199901L) /* C99 */ +# define INLINE inline +# else +# define INLINE +# endif #endif From f129f618fe8a5397774484f1b7afb42d4be809a0 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Thu, 7 Jul 2011 14:01:40 -0700 Subject: [PATCH 044/600] glsl: Move functions into standalone_scaffolding.cpp for later reuse. This patch moves the following functions from main.cpp (the main cpp file for the standalone executable that is used to create the built-in functions) to standalone_scaffolding.cpp, so that they can be re-used in other standalone executables: - initialize_context()* - _mesa_new_shader() - _mesa_reference_shader() *initialize_context contained some code that was specific to main.cpp, so it was split into two functions: initialize_context() (which remains in main.cpp), and initialize_context_from_defaults() (which is in standalone_scaffolding.cpp). --- src/glsl/Makefile | 3 +- src/glsl/main.cpp | 60 +------------------ src/glsl/standalone_scaffolding.cpp | 91 +++++++++++++++++++++++++++++ src/glsl/standalone_scaffolding.h | 54 +++++++++++++++++ 4 files changed, 150 insertions(+), 58 deletions(-) create mode 100644 src/glsl/standalone_scaffolding.cpp create mode 100644 src/glsl/standalone_scaffolding.h diff --git a/src/glsl/Makefile b/src/glsl/Makefile index d1422c2a4d6..edfb35eb0b8 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -95,7 +95,8 @@ GLSL2_C_SOURCES = \ ../mesa/program/hash_table.c \ ../mesa/program/symbol_table.c GLSL2_CXX_SOURCES = \ - main.cpp + main.cpp \ + standalone_scaffolding.cpp GLSL2_OBJECTS = \ $(GLSL2_C_SOURCES:.c=.o) \ diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 7952bb1a3e3..9f85096e1a1 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -29,80 +29,26 @@ #include "ir_print_visitor.h" #include "program.h" #include "loop_analysis.h" - -extern "C" struct gl_shader * -_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); - -extern "C" void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh); - -/* Copied from shader_api.c for the stand-alone compiler. - */ -void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh) -{ - *ptr = sh; -} - -struct gl_shader * -_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) -{ - struct gl_shader *shader; - - (void) ctx; - - assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); - shader = rzalloc(NULL, struct gl_shader); - if (shader) { - shader->Type = type; - shader->Name = name; - shader->RefCount = 1; - } - return shader; -} +#include "standalone_scaffolding.h" static void initialize_context(struct gl_context *ctx, gl_api api) { - memset(ctx, 0, sizeof(*ctx)); - - ctx->API = api; - - ctx->Extensions.ARB_ES2_compatibility = GL_TRUE; - ctx->Extensions.ARB_draw_buffers = GL_TRUE; - ctx->Extensions.ARB_draw_instanced = GL_TRUE; - ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE; - ctx->Extensions.EXT_texture_array = GL_TRUE; - ctx->Extensions.NV_texture_rectangle = GL_TRUE; - ctx->Extensions.EXT_texture3D = GL_TRUE; + initialize_context_to_defaults(ctx, api); /* GLSL 1.30 isn't fully supported, but we need to advertise 1.30 so that * the built-in functions for 1.30 can be built. */ ctx->Const.GLSLVersion = 130; - /* 1.10 minimums. */ - ctx->Const.MaxLights = 8; ctx->Const.MaxClipPlanes = 8; - ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxDrawBuffers = 2; /* More than the 1.10 minimum to appease parser tests taken from * apps that (hopefully) already checked the number of coords. */ ctx->Const.MaxTextureCoordUnits = 4; - ctx->Const.VertexProgram.MaxAttribs = 16; - ctx->Const.VertexProgram.MaxUniformComponents = 512; - ctx->Const.MaxVarying = 8; - ctx->Const.MaxVertexTextureImageUnits = 0; - ctx->Const.MaxCombinedTextureImageUnits = 2; - ctx->Const.MaxTextureImageUnits = 2; - ctx->Const.FragmentProgram.MaxUniformComponents = 64; - - ctx->Const.MaxDrawBuffers = 2; - ctx->Driver.NewShader = _mesa_new_shader; } diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp new file mode 100644 index 00000000000..696ea757e96 --- /dev/null +++ b/src/glsl/standalone_scaffolding.cpp @@ -0,0 +1,91 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#include "standalone_scaffolding.h" + +#include +#include +#include "ralloc.h" + +void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) +{ + *ptr = sh; +} + +struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) +{ + struct gl_shader *shader; + + (void) ctx; + + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + shader->RefCount = 1; + } + return shader; +} + +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->API = api; + + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_draw_buffers = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.EXT_texture_array = true; + ctx->Extensions.NV_texture_rectangle = true; + ctx->Extensions.EXT_texture3D = true; + + ctx->Const.GLSLVersion = 120; + + /* 1.20 minimums. */ + ctx->Const.MaxLights = 8; + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.VertexProgram.MaxAttribs = 16; + + ctx->Const.VertexProgram.MaxUniformComponents = 512; + ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */ + ctx->Const.MaxVertexTextureImageUnits = 0; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.MaxTextureImageUnits = 2; + ctx->Const.FragmentProgram.MaxUniformComponents = 64; + + ctx->Const.MaxDrawBuffers = 1; +} diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h new file mode 100644 index 00000000000..87733200670 --- /dev/null +++ b/src/glsl/standalone_scaffolding.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#pragma once +#ifndef STANDALONE_SCAFFOLDING_H +#define STANDALONE_SCAFFOLDING_H + +#include "main/mtypes.h" + +extern "C" void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh); + +extern "C" struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); + +/** + * Initialize the given gl_context structure to a reasonable set of + * defaults representing the minimum capabilities required by the + * OpenGL spec. + * + * This is used when compiling builtin functions and in testing, when + * we don't have a connection to an actual driver. + */ +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api); + + +#endif /* STANDALONE_SCAFFOLDING_H */ From f1f76e157ed1ba554fc3a0172113997344049e07 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 29 Jun 2011 12:30:04 -0700 Subject: [PATCH 045/600] glsl: Create a standalone executable for testing optimization passes. This patch adds a new build artifact, glsl_test, which can be used for testing optimization passes in isolation. I'm hoping that we will be able to add other useful standalone tests to this executable in the future. Accordingly, it is built in a modular fashion: the main() function uses its first argument to determine which test function to invoke, removes that argument from argv[], and then calls that function to interpret the rest of the command line arguments and perform the test. Currently the only test function is "optpass", which tests optimization passes. --- src/glsl/.gitignore | 1 + src/glsl/Makefile | 24 +++- src/glsl/test.cpp | 78 +++++++++++ src/glsl/test_optpass.cpp | 273 ++++++++++++++++++++++++++++++++++++++ src/glsl/test_optpass.h | 30 +++++ 5 files changed, 403 insertions(+), 3 deletions(-) create mode 100644 src/glsl/test.cpp create mode 100644 src/glsl/test_optpass.cpp create mode 100644 src/glsl/test_optpass.h diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore index dfbd572d894..d26839a3e3e 100644 --- a/src/glsl/.gitignore +++ b/src/glsl/.gitignore @@ -5,3 +5,4 @@ glsl_parser.h glsl_parser.output builtin_function.cpp builtin_compiler +glsl_test diff --git a/src/glsl/Makefile b/src/glsl/Makefile index edfb35eb0b8..005b51d724b 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -89,7 +89,7 @@ CXX_SOURCES = \ LIBS = \ $(TOP)/src/glsl/libglsl.a -APPS = glsl_compiler glcpp/glcpp +APPS = glsl_compiler glsl_test glcpp/glcpp GLSL2_C_SOURCES = \ ../mesa/program/hash_table.c \ @@ -102,6 +102,19 @@ GLSL2_OBJECTS = \ $(GLSL2_C_SOURCES:.c=.o) \ $(GLSL2_CXX_SOURCES:.cpp=.o) +TEST_C_SOURCES = \ + ../mesa/program/hash_table.c \ + ../mesa/program/symbol_table.c + +TEST_CXX_SOURCES = \ + standalone_scaffolding.cpp \ + test.cpp \ + test_optpass.cpp + +TEST_OBJECTS = \ + $(TEST_C_SOURCES:.c=.o) \ + $(TEST_CXX_SOURCES:.cpp=.o) + ### Basic defines ### DEFINES += \ @@ -130,7 +143,9 @@ ALL_SOURCES = \ $(C_SOURCES) \ $(CXX_SOURCES) \ $(GLSL2_CXX_SOURCES) \ - $(GLSL2_C_SOURCES) + $(GLSL2_C_SOURCES) \ + $(TEST_CXX_SOURCES) \ + $(TEST_C_SOURCES) ##### TARGETS ##### @@ -152,7 +167,7 @@ depend: $(ALL_SOURCES) Makefile # Remove .o and backup files clean: clean-dricore - rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler + rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(TEST_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler -rm -f $(APPS) clean-dricore: @@ -175,6 +190,9 @@ install-dricore: default glsl_compiler: $(GLSL2_OBJECTS) libglsl.a builtin_stubs.o $(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) builtin_stubs.o $(LIBS) -o $@ +glsl_test: $(TEST_OBJECTS) libglsl.a builtin_stubs.o + $(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(TEST_OBJECTS) builtin_stubs.o $(LIBS) -o $@ + glcpp: glcpp/glcpp glcpp/glcpp: $(GLCPP_OBJECTS) $(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) -o $@ diff --git a/src/glsl/test.cpp b/src/glsl/test.cpp new file mode 100644 index 00000000000..b1ff92ed1d4 --- /dev/null +++ b/src/glsl/test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test.cpp + * + * Standalone tests for the GLSL compiler. + * + * This file provides a standalone executable which can be used to + * test components of the GLSL. + * + * Each test is a function with the same signature as main(). The + * main function interprets its first argument as the name of the test + * to run, strips out that argument, and then calls the test function. + */ + +#include +#include +#include + +#include "test_optpass.h" + +/** + * Print proper usage and exit with failure. + */ +static void +usage_fail(const char *name) +{ + printf("*** usage: %s \n", name); + printf("\n"); + printf("Possible commands are:\n"); + printf(" optpass: test an optimization pass in isolation\n"); + exit(EXIT_FAILURE); +} + +static const char *extract_command_from_argv(int *argc, char **argv) +{ + if (*argc < 2) { + usage_fail(argv[0]); + } + const char *command = argv[1]; + --*argc; + memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1])); + return command; +} + +int main(int argc, char **argv) +{ + const char *command = extract_command_from_argv(&argc, argv); + if (strcmp(command, "optpass") == 0) { + return test_optpass(argc, argv); + } else { + usage_fail(argv[0]); + } + + /* Execution should never reach here. */ + return EXIT_FAILURE; +} diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp new file mode 100644 index 00000000000..89b7f8338dc --- /dev/null +++ b/src/glsl/test_optpass.cpp @@ -0,0 +1,273 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test_optpass.cpp + * + * Standalone test for optimization passes. + * + * This file provides the "optpass" command for the standalone + * glsl_test app. It accepts either GLSL or high-level IR as input, + * and performs the optimiation passes specified on the command line. + * It outputs the IR, both before and after optimiations. + */ + +#include +#include +#include +#include + +#include "ast.h" +#include "ir_optimization.h" +#include "ir_print_visitor.h" +#include "program.h" +#include "ir_reader.h" +#include "standalone_scaffolding.h" + +using namespace std; + +static string read_stdin_to_eof() +{ + stringbuf sb; + cin.get(sb, '\0'); + return sb.str(); +} + +static GLboolean +do_optimization(struct exec_list *ir, const char *optimization) +{ + int int_0; + int int_1; + int int_2; + int int_3; + int int_4; + + if (sscanf(optimization, "do_common_optimization ( %d , %d ) ", + &int_0, &int_1) == 2) { + return do_common_optimization(ir, int_0 != 0, int_1); + } else if (strcmp(optimization, "do_algebraic") == 0) { + return do_algebraic(ir); + } else if (strcmp(optimization, "do_constant_folding") == 0) { + return do_constant_folding(ir); + } else if (strcmp(optimization, "do_constant_variable") == 0) { + return do_constant_variable(ir); + } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) { + return do_constant_variable_unlinked(ir); + } else if (strcmp(optimization, "do_copy_propagation") == 0) { + return do_copy_propagation(ir); + } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) { + return do_copy_propagation_elements(ir); + } else if (strcmp(optimization, "do_constant_propagation") == 0) { + return do_constant_propagation(ir); + } else if (strcmp(optimization, "do_dead_code") == 0) { + return do_dead_code(ir); + } else if (strcmp(optimization, "do_dead_code_local") == 0) { + return do_dead_code_local(ir); + } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) { + return do_dead_code_unlinked(ir); + } else if (strcmp(optimization, "do_dead_functions") == 0) { + return do_dead_functions(ir); + } else if (strcmp(optimization, "do_function_inlining") == 0) { + return do_function_inlining(ir); + } else if (sscanf(optimization, + "do_lower_jumps ( %d , %d , %d , %d , %d ) ", + &int_0, &int_1, &int_2, &int_3, &int_4) == 5) { + return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0, + int_3 != 0, int_4 != 0); + } else if (strcmp(optimization, "do_lower_texture_projection") == 0) { + return do_lower_texture_projection(ir); + } else if (strcmp(optimization, "do_if_simplification") == 0) { + return do_if_simplification(ir); + } else if (strcmp(optimization, "do_discard_simplification") == 0) { + return do_discard_simplification(ir); + } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ", + &int_0) == 1) { + return lower_if_to_cond_assign(ir, int_0); + } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) { + return do_mat_op_to_vec(ir); + } else if (strcmp(optimization, "do_noop_swizzle") == 0) { + return do_noop_swizzle(ir); + } else if (strcmp(optimization, "do_structure_splitting") == 0) { + return do_structure_splitting(ir); + } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) { + return do_swizzle_swizzle(ir); + } else if (strcmp(optimization, "do_tree_grafting") == 0) { + return do_tree_grafting(ir); + } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) { + return do_vec_index_to_cond_assign(ir); + } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) { + return do_vec_index_to_swizzle(ir); + } else if (strcmp(optimization, "lower_discard") == 0) { + return lower_discard(ir); + } else if (sscanf(optimization, "lower_instructions ( %d ) ", + &int_0) == 1) { + return lower_instructions(ir, int_0); + } else if (strcmp(optimization, "lower_noise") == 0) { + return lower_noise(ir); + } else if (sscanf(optimization, "lower_variable_index_to_cond_assign " + "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2, + &int_3) == 4) { + return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0, + int_2 != 0, int_3 != 0); + } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ", + &int_0) == 1) { + return lower_quadop_vector(ir, int_0 != 0); + } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) { + return optimize_redundant_jumps(ir); + } else { + printf("Unrecognized optimization %s\n", optimization); + exit(EXIT_FAILURE); + return false; + } +} + +static GLboolean +do_optimization_passes(struct exec_list *ir, char **optimizations, + int num_optimizations, bool quiet) +{ + GLboolean overall_progress = false; + + for (int i = 0; i < num_optimizations; ++i) { + const char *optimization = optimizations[i]; + if (!quiet) { + printf("*** Running optimization %s...", optimization); + } + GLboolean progress = do_optimization(ir, optimization); + if (!quiet) { + printf("%s\n", progress ? "progress" : "no progress"); + } + validate_ir_tree(ir); + + overall_progress = overall_progress || progress; + } + + return overall_progress; +} + +int test_optpass(int argc, char **argv) +{ + int input_format_ir = 0; /* 0=glsl, 1=ir */ + int loop = 0; + int shader_type = GL_VERTEX_SHADER; + int quiet = 0; + + const struct option optpass_opts[] = { + { "input-ir", no_argument, &input_format_ir, 1 }, + { "input-glsl", no_argument, &input_format_ir, 0 }, + { "loop", no_argument, &loop, 1 }, + { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER }, + { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER }, + { "quiet", no_argument, &quiet, 1 }, + { NULL, 0, NULL, 0 } + }; + + int idx = 0; + int c; + while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) { + if (c != 0) { + printf("*** usage: %s optpass \n", argv[0]); + printf("\n"); + printf("Possible options are:\n"); + printf(" --input-ir: input format is IR\n"); + printf(" --input-glsl: input format is GLSL (the default)\n"); + printf(" --loop: run optimizations repeatedly until no progress\n"); + printf(" --vertex-shader: test with a vertex shader (the default)\n"); + printf(" --fragment-shader: test with a fragment shader\n"); + exit(EXIT_FAILURE); + } + } + + struct gl_context local_ctx; + struct gl_context *ctx = &local_ctx; + initialize_context_to_defaults(ctx, API_OPENGL); + + ctx->Driver.NewShader = _mesa_new_shader; + + struct gl_shader *shader = rzalloc(NULL, struct gl_shader); + shader->Type = shader_type; + + string input = read_stdin_to_eof(); + + struct _mesa_glsl_parse_state *state + = new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader); + + if (input_format_ir) { + shader->ir = new(shader) exec_list; + _mesa_glsl_initialize_types(state); + _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true); + } else { + shader->Source = input.c_str(); + const char *source = shader->Source; + state->error = preprocess(state, &source, &state->info_log, + state->extensions, ctx->API) != 0; + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + } + + /* Print out the initial IR */ + if (!state->error && !quiet) { + printf("*** pre-optimization IR:\n"); + _mesa_print_ir(shader->ir, state); + printf("\n--\n"); + } + + /* Optimization passes */ + if (!state->error) { + GLboolean progress; + do { + progress = do_optimization_passes(shader->ir, &argv[optind], + argc - optind, quiet != 0); + } while (loop && progress); + } + + /* Print out the resulting IR */ + if (!state->error) { + if (!quiet) { + printf("*** resulting IR:\n"); + } + _mesa_print_ir(shader->ir, state); + if (!quiet) { + printf("\n--\n"); + } + } + + if (state->error) { + printf("*** error(s) occurred:\n"); + printf("%s\n", state->info_log); + printf("--\n"); + } + + ralloc_free(state); + ralloc_free(shader); + + return state->error; +} + diff --git a/src/glsl/test_optpass.h b/src/glsl/test_optpass.h new file mode 100644 index 00000000000..923ccf3dece --- /dev/null +++ b/src/glsl/test_optpass.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef TEST_OPTPASS_H +#define TEST_OPTPASS_H + +int test_optpass(int argc, char **argv); + +#endif /* TEST_OPTPASS_H */ From 659cdedb532e675da5676d40ee39278aadd8f0a1 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 5 Jul 2011 11:52:06 -0700 Subject: [PATCH 046/600] glsl: Add unit tests for lower_jumps.cpp These tests invoke do_lower_jumps() in isolation (using the glsl_test executable) and verify that it transforms the IR in the expected way. The unit tests may be run from the top level directory using "make check". For reference, I've also checked in the Python script create_test_cases.py, which was used to generate these tests. It is not necessary to run this script in order to run the tests. Acked-by: Chad Versace --- Makefile | 6 +- src/glsl/tests/compare_ir | 59 ++ src/glsl/tests/lower_jumps/.gitignore | 1 + .../tests/lower_jumps/create_test_cases.py | 643 ++++++++++++++++++ .../tests/lower_jumps/lower_breaks_1.opt_test | 13 + .../lower_breaks_1.opt_test.expected | 5 + .../tests/lower_jumps/lower_breaks_2.opt_test | 15 + .../lower_breaks_2.opt_test.expected | 7 + .../tests/lower_jumps/lower_breaks_3.opt_test | 17 + .../lower_breaks_3.opt_test.expected | 8 + .../tests/lower_jumps/lower_breaks_4.opt_test | 15 + .../lower_breaks_4.opt_test.expected | 7 + .../tests/lower_jumps/lower_breaks_5.opt_test | 16 + .../lower_breaks_5.opt_test.expected | 7 + .../tests/lower_jumps/lower_breaks_6.opt_test | 29 + .../lower_breaks_6.opt_test.expected | 29 + .../lower_guarded_conditional_break.opt_test | 21 + ...uarded_conditional_break.opt_test.expected | 20 + .../lower_pulled_out_jump.opt_test | 28 + .../lower_pulled_out_jump.opt_test.expected | 25 + .../lower_jumps/lower_returns_1.opt_test | 12 + .../lower_returns_1.opt_test.expected | 4 + .../lower_jumps/lower_returns_2.opt_test | 13 + .../lower_returns_2.opt_test.expected | 5 + .../lower_jumps/lower_returns_3.opt_test | 20 + .../lower_returns_3.opt_test.expected | 21 + .../lower_jumps/lower_returns_4.opt_test | 14 + .../lower_returns_4.opt_test.expected | 16 + .../lower_returns_main_false.opt_test | 17 + ...lower_returns_main_false.opt_test.expected | 8 + .../lower_returns_main_true.opt_test | 17 + .../lower_returns_main_true.opt_test.expected | 13 + .../lower_returns_sub_false.opt_test | 16 + .../lower_returns_sub_false.opt_test.expected | 8 + .../lower_returns_sub_true.opt_test | 16 + .../lower_returns_sub_true.opt_test.expected | 13 + .../lower_unified_returns.opt_test | 26 + .../lower_unified_returns.opt_test.expected | 21 + .../remove_continue_at_end_of_loop.opt_test | 13 + ..._continue_at_end_of_loop.opt_test.expected | 5 + ...void_at_end_of_loop_lower_nothing.opt_test | 16 + ...nd_of_loop_lower_nothing.opt_test.expected | 8 + ..._void_at_end_of_loop_lower_return.opt_test | 16 + ...end_of_loop_lower_return.opt_test.expected | 19 + ...nd_of_loop_lower_return_and_break.opt_test | 16 + ...p_lower_return_and_break.opt_test.expected | 19 + ...void_at_end_of_loop_lower_nothing.opt_test | 14 + ...nd_of_loop_lower_nothing.opt_test.expected | 6 + ..._void_at_end_of_loop_lower_return.opt_test | 14 + ...end_of_loop_lower_return.opt_test.expected | 11 + ...nd_of_loop_lower_return_and_break.opt_test | 14 + ...p_lower_return_and_break.opt_test.expected | 11 + src/glsl/tests/optimization-test | 28 + src/glsl/tests/sexps.py | 103 +++ 54 files changed, 1543 insertions(+), 1 deletion(-) create mode 100755 src/glsl/tests/compare_ir create mode 100644 src/glsl/tests/lower_jumps/.gitignore create mode 100644 src/glsl/tests/lower_jumps/create_test_cases.py create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_1.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_2.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_3.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_4.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_5.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_6.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_1.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_2.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_3.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_4.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/lower_unified_returns.opt_test create mode 100644 src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test create mode 100644 src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test create mode 100644 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test create mode 100644 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test create mode 100644 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test create mode 100644 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test create mode 100644 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected create mode 100755 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test create mode 100644 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected create mode 100755 src/glsl/tests/optimization-test create mode 100644 src/glsl/tests/sexps.py diff --git a/Makefile b/Makefile index 817f3d3706a..916c498416d 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,10 @@ all: default doxygen: cd doxygen && $(MAKE) +check: + cd src/glsl/tests/ && ./optimization-test + @echo "All tests passed." + clean: -@touch $(TOP)/configs/current -@for dir in $(SUBDIRS) ; do \ @@ -51,7 +55,7 @@ install: done -.PHONY: default doxygen clean realclean distclean install +.PHONY: default doxygen clean realclean distclean install check # If there's no current configuration file $(TOP)/configs/current: diff --git a/src/glsl/tests/compare_ir b/src/glsl/tests/compare_ir new file mode 100755 index 00000000000..a40fc810cf3 --- /dev/null +++ b/src/glsl/tests/compare_ir @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Compare two files containing IR code. Ignore formatting differences +# and declaration order. + +import os +import os.path +import subprocess +import sys +import tempfile + +from sexps import * + +if len(sys.argv) != 3: + print 'Usage: compare_ir ' + exit(1) + +with open(sys.argv[1]) as f: + ir1 = sort_decls(parse_sexp(f.read())) +with open(sys.argv[2]) as f: + ir2 = sort_decls(parse_sexp(f.read())) + +if ir1 == ir2: + exit(0) +else: + file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1])) + file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2])) + try: + os.write(file1, '{0}\n'.format(sexp_to_string(ir1))) + os.close(file1) + os.write(file2, '{0}\n'.format(sexp_to_string(ir2))) + os.close(file2) + subprocess.call(['diff', '-u', path1, path2]) + finally: + os.remove(path1) + os.remove(path2) + exit(1) diff --git a/src/glsl/tests/lower_jumps/.gitignore b/src/glsl/tests/lower_jumps/.gitignore new file mode 100644 index 00000000000..f47cb2045f1 --- /dev/null +++ b/src/glsl/tests/lower_jumps/.gitignore @@ -0,0 +1 @@ +*.out diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py b/src/glsl/tests/lower_jumps/create_test_cases.py new file mode 100644 index 00000000000..fbc6f0a84ea --- /dev/null +++ b/src/glsl/tests/lower_jumps/create_test_cases.py @@ -0,0 +1,643 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import os +import os.path +import re +import subprocess +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir +from sexps import * + +def make_test_case(f_name, ret_type, body): + """Create a simple optimization test case consisting of a single + function with the given name, return type, and body. + + Global declarations are automatically created for any undeclared + variables that are referenced by the function. All undeclared + variables are assumed to be floats. + """ + check_sexp(body) + declarations = {} + def make_declarations(sexp, already_declared = ()): + if isinstance(sexp, list): + if len(sexp) == 2 and sexp[0] == 'var_ref': + if sexp[1] not in already_declared: + declarations[sexp[1]] = [ + 'declare', ['in'], 'float', sexp[1]] + elif len(sexp) == 4 and sexp[0] == 'assign': + assert sexp[2][0] == 'var_ref' + if sexp[2][1] not in already_declared: + declarations[sexp[2][1]] = [ + 'declare', ['out'], 'float', sexp[2][1]] + make_declarations(sexp[3], already_declared) + else: + already_declared = set(already_declared) + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and \ + s[0] == 'declare': + already_declared.add(s[3]) + else: + make_declarations(s, already_declared) + make_declarations(body) + return declarations.values() + \ + [['function', f_name, ['signature', ret_type, ['parameters'], body]]] + + +# The following functions can be used to build expressions. + +def const_float(value): + """Create an expression representing the given floating point value.""" + return ['constant', 'float', ['{0:.6f}'.format(value)]] + +def const_bool(value): + """Create an expression representing the given boolean value. + + If value is not a boolean, it is converted to a boolean. So, for + instance, const_bool(1) is equivalent to const_bool(True). + """ + return ['constant', 'bool', ['{0}'.format(1 if value else 0)]] + +def gt_zero(var_name): + """Create Construct the expression var_name > 0""" + return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)] + + +# The following functions can be used to build complex control flow +# statements. All of these functions return statement lists (even +# those which only create a single statement), so that statements can +# be sequenced together using the '+' operator. + +def return_(value = None): + """Create a return statement.""" + if value is not None: + return [['return', value]] + else: + return [['return']] + +def break_(): + """Create a break statement.""" + return ['break'] + +def continue_(): + """Create a continue statement.""" + return ['continue'] + +def simple_if(var_name, then_statements, else_statements = None): + """Create a statement of the form + + if (var_name > 0.0) { + + } else { + + } + + else_statements may be omitted. + """ + if else_statements is None: + else_statements = [] + check_sexp(then_statements) + check_sexp(else_statements) + return [['if', gt_zero(var_name), then_statements, else_statements]] + +def loop(statements): + """Create a loop containing the given statements as its loop + body. + """ + check_sexp(statements) + return [['loop', [], [], [], [], statements]] + +def declare_temp(var_type, var_name): + """Create a declaration of the form + + (declare (temporary) to the variable + . The assignment uses the mask (x). + """ + check_sexp(value) + return [['assign', ['x'], ['var_ref', var_name], value]] + +def complex_if(var_prefix, statements): + """Create a statement of the form + + if (a > 0.0) { + if (b > 0.0) { + + } + } + + This is useful in testing jump lowering, because if + ends in a jump, lower_jumps.cpp won't try to combine this + construct with the code that follows it, as it might do for a + simple if. + + All variables used in the if statement are prefixed with + var_prefix. This can be used to ensure uniqueness. + """ + check_sexp(statements) + return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements)) + +def declare_execute_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean execute_flag. + """ + return declare_temp('bool', 'execute_flag') + \ + assign_x('execute_flag', const_bool(True)) + +def declare_return_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean return_flag. + """ + return declare_temp('bool', 'return_flag') + \ + assign_x('return_flag', const_bool(False)) + +def declare_return_value(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary variable return_value. Assume that + return_value is a float. + """ + return declare_temp('float', 'return_value') + +def declare_break_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean break_flag. + """ + return declare_temp('bool', 'break_flag') + \ + assign_x('break_flag', const_bool(False)) + +def lowered_return_simple(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it does not need to clear the + execute flag. + """ + if value: + result = assign_x('return_value', value) + else: + result = [] + return result + assign_x('return_flag', const_bool(True)) + +def lowered_return(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_return_simple(value) + \ + assign_x('execute_flag', const_bool(False)) + +def lowered_continue(): + """Create the statement that lower_jumps.cpp lowers a continue + statement to. + """ + return assign_x('execute_flag', const_bool(False)) + +def lowered_break_simple(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it does not need to clear the + execute flag. + """ + return assign_x('break_flag', const_bool(True)) + +def lowered_break(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_break_simple() + assign_x('execute_flag', const_bool(False)) + +def if_execute_flag(statements): + """Wrap statements in an if test so that they will only execute if + execute_flag is True. + """ + check_sexp(statements) + return [['if', ['var_ref', 'execute_flag'], statements, []]] + +def if_not_return_flag(statements): + """Wrap statements in an if test so that they will only execute if + return_flag is False. + """ + check_sexp(statements) + return [['if', ['var_ref', 'return_flag'], [], statements]] + +def final_return(): + """Create the return statement that lower_jumps.cpp places at the + end of a function when lowering returns. + """ + return [['return', ['var_ref', 'return_value']]] + +def final_break(): + """Create the conditional break statement that lower_jumps.cpp + places at the end of a function when lowering breaks. + """ + return [['if', ['var_ref', 'break_flag'], break_(), []]] + +def bash_quote(*args): + """Quote the arguments appropriately so that bash will understand + each argument as a single word. + """ + def quote_word(word): + for c in word: + if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'): + break + else: + if not word: + return "''" + return word + return "'{0}'".format(word.replace("'", "'\"'\"'")) + return ' '.join(quote_word(word) for word in args) + +def create_test_case(doc_string, input_sexp, expected_sexp, test_name, + pull_out_jumps=False, lower_sub_return=False, + lower_main_return=False, lower_continue=False, + lower_break=False): + """Create a test case that verifies that do_lower_jumps transforms + the given code in the expected way. + """ + doc_lines = [line.strip() for line in doc_string.splitlines()] + doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '') + check_sexp(input_sexp) + check_sexp(expected_sexp) + input_str = sexp_to_string(sort_decls(input_sexp)) + expected_output = sexp_to_string(sort_decls(expected_sexp)) + + optimization = ( + 'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format( + pull_out_jumps, lower_sub_return, lower_main_return, + lower_continue, lower_break)) + args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization] + test_file = '{0}.opt_test'.format(test_name) + with open(test_file, 'w') as f: + f.write('#!/bin/bash\n#\n# This file was generated by create_test_cases.py.\n#\n') + f.write(doc_string) + f.write('{0} < (var_ref b) (constant float (0.000000))) (break) + ()))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected new file mode 100644 index 00000000000..a4cb2d6a125 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected @@ -0,0 +1,7 @@ +((declare (in) float b) (declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) (break) + ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test new file mode 100755 index 00000000000..4149360b5d0 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the then-clause, even if +# there are statements preceding the break. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' < (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref c) (constant float (1.000000))) break) + ()))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected new file mode 100644 index 00000000000..325f7b49a5d --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected @@ -0,0 +1,8 @@ +((declare (in) float b) (declare (out) float a) (declare (out) float c) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref c) (constant float (1.000000))) break) + ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test new file mode 100755 index 00000000000..70458bb4f8e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test @@ -0,0 +1,15 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the else-clause. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' < (var_ref b) (constant float (0.000000))) () + (break)))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected new file mode 100644 index 00000000000..a7735457cb8 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected @@ -0,0 +1,7 @@ +((declare (in) float b) (declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) () + (break)))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test new file mode 100755 index 00000000000..da9eef1105e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the else-clause, even if +# there are statements preceding the break. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' < (var_ref b) (constant float (0.000000))) () + ((assign (x) (var_ref c) (constant float (1.000000))) break)))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected new file mode 100644 index 00000000000..0dd4a529383 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected @@ -0,0 +1,7 @@ +((declare (in) float b) (declare (out) float a) (declare (out) float c) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) () + ((assign (x) (var_ref c) (constant float (1.000000))) break)))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test new file mode 100755 index 00000000000..9440dfec897 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test @@ -0,0 +1,29 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains conditional breaks and continues, and +# ends in an unconditional break, then the unconditional break +# needs to be lowered, because it will no longer be at the end +# of the loop after the final break is added. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' < (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref ba) (constant float (0.000000))) + ((if (expression bool > (var_ref bb) (constant float (0.000000))) + (continue) + ())) + ()) + (if (expression bool > (var_ref ca) (constant float (0.000000))) + ((if (expression bool > (var_ref cb) (constant float (0.000000))) + (break) + ())) + ())) + ()) + break)))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected new file mode 100644 index 00000000000..8222328e00c --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected @@ -0,0 +1,29 @@ +((declare (in) float a) (declare (in) float ba) (declare (in) float bb) + (declare (in) float ca) + (declare (in) float cb) + (function main + (signature void (parameters) + ((declare (temporary) bool break_flag) + (assign (x) (var_ref break_flag) (constant bool (0))) + (loop () () () () + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref ba) (constant float (0.000000))) + ((if (expression bool > (var_ref bb) (constant float (0.000000))) + ((assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((if (expression bool > (var_ref ca) (constant float (0.000000))) + ((if (expression bool > (var_ref cb) (constant float (0.000000))) + ((assign (x) (var_ref break_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ())) + ())) + ()) + (if (var_ref execute_flag) + ((assign (x) (var_ref break_flag) (constant bool (1)))) + ()) + (if (var_ref break_flag) (break) ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test new file mode 100755 index 00000000000..379aa59b5a2 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test @@ -0,0 +1,21 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Normally a conditional break at the end of a loop isn't +# lowered, however if the conditional break gets placed inside +# an if(execute_flag) because of earlier lowering of continues, +# then the break needs to be lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' < (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + (continue) + ())) + ()) + (if (expression bool > (var_ref b) (constant float (0.000000))) (break) + ()))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected new file mode 100644 index 00000000000..7c6e73f77f8 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected @@ -0,0 +1,20 @@ +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (function main + (signature void (parameters) + ((declare (temporary) bool break_flag) + (assign (x) (var_ref break_flag) (constant bool (0))) + (loop () () () () + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref break_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref break_flag) (break) ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test new file mode 100755 index 00000000000..15f3c41d5a2 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test @@ -0,0 +1,28 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If one branch of an if ends in a jump, and control cannot +# fall out the bottom of the other branch, and pull_out_jumps is +# True, then the jump is lifted outside the if. +# Verify that this lowering occurs during the same pass as the +# lowering of other jumps by checking that extra temporary +# variables aren't generated. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' < (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((return)) + ())) + ()) + (loop () () () () + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) (break) + (continue))) + ((return))))) + (assign (x) (var_ref d) (constant float (1.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected new file mode 100644 index 00000000000..bf45c2c93b6 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected @@ -0,0 +1,25 @@ +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (declare (in) float c) + (declare (out) float d) + (function main + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((loop () () () () + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) () + (continue))) + ((assign (x) (var_ref return_flag) (constant bool (1))))) + break)) + (if (var_ref return_flag) () + ((assign (x) (var_ref d) (constant float (1.000000)))))) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test new file mode 100755 index 00000000000..a1f895bbf78 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test @@ -0,0 +1,12 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a void return at the end of a function is +# eliminated. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' < (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return (constant float (1.000000)))) + ())) + ()) + (return (constant float (2.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected new file mode 100644 index 00000000000..d4835e96b7c --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected @@ -0,0 +1,21 @@ +((declare (in) float a) (declare (in) float b) + (function sub + (signature float (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) float return_value) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref return_value) (constant float (1.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((assign (x) (var_ref return_value) (constant float (2.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ()) + (return (var_ref return_value)))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test new file mode 100755 index 00000000000..9f54c67a180 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test @@ -0,0 +1,14 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that returns are properly lowered when they occur in +# both branches of an if-statement. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' < (var_ref a) (constant float (0.000000))) + ((return (constant float (1.000000)))) + ((return (constant float (2.000000))))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected new file mode 100644 index 00000000000..b551a066f43 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected @@ -0,0 +1,16 @@ +((declare (in) float a) + (function sub + (signature float (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) float return_value) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((assign (x) (var_ref return_value) (constant float (1.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ((assign (x) (var_ref return_value) (constant float (2.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0))))) + (return (var_ref return_value)))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test new file mode 100755 index 00000000000..5f97bfd3f5a --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_main_return +# flag in deciding whether to lower returns in the main +# function. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' < (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected new file mode 100644 index 00000000000..e8b36f14478 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected @@ -0,0 +1,8 @@ +((declare (in) float a) (declare (in) float b) + (function main + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test new file mode 100755 index 00000000000..59c7ba1dd52 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_main_return +# flag in deciding whether to lower returns in the main +# function. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' < (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected new file mode 100644 index 00000000000..e15a97d1db2 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected @@ -0,0 +1,13 @@ +((declare (in) float a) (declare (in) float b) + (function main + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test new file mode 100755 index 00000000000..40e784e3318 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_sub_return flag +# in deciding whether to lower returns in subroutines. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' < (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected new file mode 100644 index 00000000000..07db6e708f4 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected @@ -0,0 +1,8 @@ +((declare (in) float a) (declare (in) float b) + (function sub + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test new file mode 100755 index 00000000000..9fe6b90f085 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_sub_return flag +# in deciding whether to lower returns in subroutines. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' < (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected new file mode 100644 index 00000000000..31109802351 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected @@ -0,0 +1,13 @@ +((declare (in) float a) (declare (in) float b) + (function sub + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test new file mode 100755 index 00000000000..e7168131487 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test @@ -0,0 +1,26 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If both branches of an if statement end in a return, and +# pull_out_jumps is True, then those returns should be lifted +# outside the if and then properly lowered. +# Verify that this lowering occurs during the same pass as the +# lowering of other returns by checking that extra temporary +# variables aren't generated. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' < (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((return)) + ())) + ()) + (if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) + ((return)) + ((return)))) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected new file mode 100644 index 00000000000..271cd3b462e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected @@ -0,0 +1,21 @@ +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (declare (in) float c) + (function main + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) () ()) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test new file mode 100755 index 00000000000..18efc37f6e1 --- /dev/null +++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test @@ -0,0 +1,13 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a redundant continue-statement at the end of a +# loop is removed. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' < "$test.out" 2>&1 + total=$((total+1)) + if ./compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + ./compare_ir "$test.expected" "$test.out" + fi +done + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [[ $pass == $total ]]; then + exit 0 +else + exit 1 +fi diff --git a/src/glsl/tests/sexps.py b/src/glsl/tests/sexps.py new file mode 100644 index 00000000000..a714af8d236 --- /dev/null +++ b/src/glsl/tests/sexps.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# This file contains helper functions for manipulating sexps in Python. +# +# We represent a sexp in Python using nested lists containing strings. +# So, for example, the sexp (constant float (1.000000)) is represented +# as ['constant', 'float', ['1.000000']]. + +import re + +def check_sexp(sexp): + """Verify that the argument is a proper sexp. + + That is, raise an exception if the argument is not a string or a + list, or if it contains anything that is not a string or a list at + any nesting level. + """ + if isinstance(sexp, list): + for s in sexp: + check_sexp(s) + elif not isinstance(sexp, basestring): + raise Exception('Not a sexp: {0!r}'.format(sexp)) + +def parse_sexp(sexp): + """Convert a string, of the form that would be output by mesa, + into a sexp represented as nested lists containing strings. + """ + sexp_token_regexp = re.compile( + '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]') + stack = [[]] + for match in sexp_token_regexp.finditer(sexp): + token = match.group(0) + if token == '(': + stack.append([]) + elif token == ')': + if len(stack) == 1: + raise Exception('Unmatched )') + sexp = stack.pop() + stack[-1].append(sexp) + else: + stack[-1].append(token) + if len(stack) != 1: + raise Exception('Unmatched (') + if len(stack[0]) != 1: + raise Exception('Multiple sexps') + return stack[0][0] + +def sexp_to_string(sexp): + """Convert a sexp, represented as nested lists containing strings, + into a single string of the form parseable by mesa. + """ + if isinstance(sexp, basestring): + return sexp + assert isinstance(sexp, list) + result = '' + for s in sexp: + sub_result = sexp_to_string(s) + if result == '': + result = sub_result + elif '\n' not in result and '\n' not in sub_result and \ + len(result) + len(sub_result) + 1 <= 70: + result += ' ' + sub_result + else: + result += '\n' + sub_result + return '({0})'.format(result.replace('\n', '\n ')) + +def sort_decls(sexp): + """Sort all toplevel variable declarations in sexp. + + This is used to work around the fact that + ir_reader::read_instructions reorders declarations. + """ + assert isinstance(sexp, list) + decls = [] + other_code = [] + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare': + decls.append(s) + else: + other_code.append(s) + return sorted(decls) + other_code + From edaadd94cbae45dc3c1c5481e346f729ea2bc73b Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 22 Jul 2011 10:38:05 -0700 Subject: [PATCH 047/600] glsl: Add standalone_scaffolding.cpp to SConscript. --- src/glsl/SConscript | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/SConscript b/src/glsl/SConscript index ea104abb823..1da58a91f91 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -96,6 +96,7 @@ glsl_sources = [ 'opt_tree_grafting.cpp', 'ralloc.c', 's_expression.cpp', + 'standalone_scaffolding.cpp', 'strtod.c', ] From eb7590f6772db844d95ed4937ab7c98a3f412a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 19 Jul 2011 15:58:21 -0700 Subject: [PATCH 048/600] util: Store alpha value too. --- src/gallium/auxiliary/util/u_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 004df439ff5..2d6193039a7 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -730,7 +730,7 @@ debug_dump_float_rgba_bmp(const char *filename, pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]); pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); - pixel.rgbAlpha = 255; + pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]); os_stream_write(stream, &pixel, 4); } } From 0a1d49504de4d34b003625ee7c901667afa43dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 20 Jul 2011 14:39:23 -0700 Subject: [PATCH 049/600] llvmpipe: Unit tests for arithmetic functions. Conflicts: src/gallium/drivers/llvmpipe/SConscript --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 5 +- src/gallium/drivers/llvmpipe/lp_test_arit.c | 294 ++++++++++++++++++++ 3 files changed, 298 insertions(+), 2 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_test_arit.c diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index ba9705bebee..f9301354fc5 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -51,6 +51,7 @@ C_SOURCES = \ CPP_SOURCES = \ PROGS := lp_test_format \ + lp_test_arit \ lp_test_blend \ lp_test_conv \ lp_test_printf \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index d6b20ceb5ce..2b232a524ae 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -85,11 +85,12 @@ if not env['embedded']: env.Prepend(LIBS = [llvmpipe] + gallium) tests = [ + 'arit', 'format', 'blend', 'conv', - 'printf', - 'sincos', + 'printf', + 'sincos', ] if not env['msvc']: diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c new file mode 100644 index 00000000000..f0e43e0f9cc --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include +#include +#include + +#include "util/u_pointer.h" +#include "util/u_memory.h" + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_arit.h" + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +typedef float (*unary_func_t)(float); + + +/** + * Describe a test case of one unary function. + */ +struct unary_test_t +{ + /* + * Test name -- name of the mathematical function under test. + */ + + const char *name; + + LLVMValueRef + (*builder)(struct lp_build_context *bld, LLVMValueRef a); + + /* + * Reference (pure-C) function. + */ + float + (*ref)(float a); + + /* + * Test values. + */ + const float *values; + unsigned num_values; +}; + + +const float exp2_values[] = { + -60, + -4, + -2, + -1, + -1e-007, + 0, + 1e-007, + 1, + 2, + 4, + 60 +}; + + +const float log2_values[] = { +#if 0 + /* + * Smallest denormalized number; meant just for experimentation, but not + * validation. + */ + 1.4012984643248171e-45, +#endif + 1e-007, + 0.5, + 1, + 2, + 4, + 100000, + 1e+018 +}; + + +static float rsqrtf(float x) +{ + return 1.0/sqrt(x); +} + + +const float rsqrt_values[] = { + -1, -1e-007, + 1e-007, 1, + -4, -1, + 1, 4, + -1e+035, -100000, + 100000, 1e+035, +}; + + +const float sincos_values[] = { + -5*M_PI/4, + -4*M_PI/4, + -4*M_PI/4, + -3*M_PI/4, + -2*M_PI/4, + -1*M_PI/4, + 1*M_PI/4, + 2*M_PI/4, + 3*M_PI/4, + 4*M_PI/4, + 5*M_PI/4, +}; + + +/* + * Unary test cases. + */ + +static const struct unary_test_t unary_tests[] = { + {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)}, + {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)}, + {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)}, + {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)}, + {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)}, + {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)}, + {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)}, +}; + + +/* + * Build LLVM function that exercises the unary operator builder. + */ +static LLVMValueRef +build_unary_test_func(struct gallivm_state *gallivm, + LLVMModuleRef module, + LLVMContextRef context, + const struct unary_test_t *test) +{ + LLVMTypeRef i32t = LLVMInt32TypeInContext(context); + LLVMTypeRef f32t = LLVMFloatTypeInContext(context); + LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4); + LLVMTypeRef args[1] = { f32t }; + LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0)); + LLVMValueRef arg1 = LLVMGetParam(func, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef ret; + + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + + /* scalar to vector */ + arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, ""); + + ret = test->builder(&bld, arg1); + + /* vector to scalar */ + ret = LLVMBuildExtractElement(builder, ret, index0, ""); + + LLVMBuildRet(builder, ret); + return func; +} + + +/* + * Test one LLVM unary arithmetic builder function. + */ +static boolean +test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test) +{ + LLVMModuleRef module = gallivm->module; + LLVMValueRef test_func; + LLVMExecutionEngineRef engine = gallivm->engine; + LLVMContextRef context = gallivm->context; + char *error = NULL; + unary_func_t test_func_jit; + boolean success = TRUE; + int i; + + test_func = build_unary_test_func(gallivm, module, context, test); + + if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + printf("LLVMVerifyModule: %s\n", error); + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func)); + + for (i = 0; i < test->num_values; ++i) { + float value = test->values[i]; + float ref = test->ref(value); + float src = test_func_jit(value); + + double error = fabs(src - ref); + double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + bool pass = precision >= 20.0; + + if (isnan(ref)) { + continue; + } + + if (!pass || verbose) { + printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n", + test->name, value, ref, src, precision, + pass ? "PASS" : "FAIL"); + } + + if (!pass) { + success = FALSE; + } + } + + LLVMFreeMachineCodeForFunction(engine, test_func); + + return success; +} + + +boolean +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + int i; + + for (i = 0; i < Elements(unary_tests); ++i) { + if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) { + success = FALSE; + } + } + + return success; +} + + +boolean +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) +{ + /* + * Not randomly generated test cases, so test all. + */ + + return test_all(gallivm, verbose, fp); +} + + +boolean +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + return TRUE; +} From 1ac86e249e38b163a3c3cc1915e7de7877c08fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 19 Jul 2011 15:58:09 -0700 Subject: [PATCH 050/600] gallivm: Fix lp_build_exp/lp_build_log. Never used so far -- we only used the base 2 variants -- which is why it went unnoticed so far. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 02b3bde7893..06e5debe4a3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -2151,7 +2151,7 @@ lp_build_exp(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); + return lp_build_exp2(bld, lp_build_mul(bld, log2e, x)); } @@ -2168,7 +2168,7 @@ lp_build_log(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); + return lp_build_mul(bld, log2, lp_build_log2(bld, x)); } From ef1a2765a45c03b3bf7b5994197a611bcef96e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 20 Jul 2011 14:34:46 -0700 Subject: [PATCH 051/600] gallivm: Update minimax comments. --- src/gallium/auxiliary/gallivm/f.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/f.cpp b/src/gallium/auxiliary/gallivm/f.cpp index 5eb09c01ab3..6b9c35b3ce5 100644 --- a/src/gallium/auxiliary/gallivm/f.cpp +++ b/src/gallium/auxiliary/gallivm/f.cpp @@ -15,8 +15,9 @@ * * How to use this source: * - * - Download and abuild the NTL library from - * http://shoup.net/ntl/download.html + * - Download and build the NTL library from + * http://shoup.net/ntl/download.html , or install libntl-dev package if on + * Debian. * * - Download boost source code matching to your distro. * @@ -24,22 +25,32 @@ * * - Build as * - * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a -lboost_math_tr1 + * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a * * - Run as * * ./minimax * - * - For example, to compute exp2 5th order polynomial between [0, 1] do: + * - For example, to compute log2 5th order polynomial between [1, 2] do: + * + * variant 0 + * range 1 2 + * order 5 0 + * step 200 + * info + * + * and take the coefficients from the P = { ... } array. + * + * - To compute exp2 5th order polynomial between [0, 1] do: * * variant 1 * range 0 1 * order 5 0 - * steps 200 + * step 200 * info * * - For more info see - * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + * http://www.boost.org/doc/libs/1_47_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html */ #define L22 From 47d6d44a231b811f1bba05478a6bbfb1e3fdb27b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 20 Jul 2011 14:41:17 -0700 Subject: [PATCH 052/600] gallivm: Increase lp_build_rsqrt() precision. Add an iteration step, which makes rqsqrt precision go from 12bits to 24, and fixes RSQ/NRM test case of PSPrecision/VSPrevision DCTs. There are no uses of this function outside shader translation. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 06e5debe4a3..fce4685cc2d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1645,7 +1645,7 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { - const unsigned num_iterations = 0; + const unsigned num_iterations = 1; LLVMValueRef res; unsigned i; From af82ff556cdd748f9f6b0d60d53afaaf369b1c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 20 Jul 2011 14:53:08 -0700 Subject: [PATCH 053/600] gallivm: Fix lp_build_exp2 order 4-5 polynomial coefficients and bump order. Not sure how I computed these, but they were wrong (which explains why bumping the polynomial order before never improved precision). This allows to pass the EXP test cases of PSPrecision/VSPrecision DCTs. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index fce4685cc2d..0c075bafb2c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -61,7 +61,7 @@ #include "lp_bld_arit.h" -#define EXP_POLY_DEGREE 3 +#define EXP_POLY_DEGREE 5 #define LOG_POLY_DEGREE 5 @@ -2218,18 +2218,18 @@ lp_build_polynomial(struct lp_build_context *bld, */ const double lp_build_exp2_polynomial[] = { #if EXP_POLY_DEGREE == 5 - 0.999999999690134838155, - 0.583974334321735217258, - 0.164553105719676828492, - 0.0292811063701710962255, - 0.00354944426657875141846, - 0.000296253726543423377365 + 0.999999925063526176901, + 0.693153073200168932794, + 0.240153617044375388211, + 0.0558263180532956664775, + 0.00898934009049466391101, + 0.00187757667519147912699 #elif EXP_POLY_DEGREE == 4 - 1.00000001502262084505, - 0.563586057338685991394, - 0.150436017652442413623, - 0.0243220604213317927308, - 0.0025359088446580436489 + 1.00000259337069434683, + 0.693003834469974940458, + 0.24144275689150793076, + 0.0520114606103070150235, + 0.0135341679161270268764 #elif EXP_POLY_DEGREE == 3 0.999925218562710312959, 0.695833540494823811697, From 5161aff48af2fe0171be06fc727a000ad300fbd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 20 Jul 2011 14:53:59 -0700 Subject: [PATCH 054/600] gallivm: Add a note about log2 computation and denormalized numbers. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 0c075bafb2c..2be8598704e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -2465,6 +2465,12 @@ lp_build_log2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); + /* + * We don't explicitly handle denormalized numbers. They will yield a + * result in the neighbourhood of -127, which appears to be adequate + * enough. + */ + i = LLVMBuildBitCast(builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ From 8d5f3cef795428d7a95120416122a39c10cff11c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Jul 2011 09:51:34 -0700 Subject: [PATCH 055/600] glsl: Move is_array_or_matrix outside visitor class There's no reason for it to be there, and another class that may not have access to the visitor will need it soon. Reviewed-by: Eric Anholt --- src/glsl/lower_variable_index_to_cond_assign.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 8eb1612f0a0..45adb267f2c 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -37,6 +37,12 @@ #include "glsl_types.h" #include "main/macros.h" +static inline bool +is_array_or_matrix(const ir_instruction *ir) +{ + return (ir->type->is_array() || ir->type->is_matrix()); +} + struct assignment_generator { ir_instruction* base_ir; @@ -233,11 +239,6 @@ public: bool lower_temps; bool lower_uniforms; - bool is_array_or_matrix(const ir_instruction *ir) const - { - return (ir->type->is_array() || ir->type->is_matrix()); - } - bool needs_lowering(ir_dereference_array *deref) const { if (deref == NULL || deref->array_index->as_constant() From d2296e784aa8fad81c4910dcbbf61e826ce9a06a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Jul 2011 10:07:24 -0700 Subject: [PATCH 056/600] glsl: Split out part of variable_index_to_cond_assign_visitor::needs_lowering Other code will soon need to know if an array needs lowering based exclusively on the storage mode. Reviewed-by: Eric Anholt --- src/glsl/lower_variable_index_to_cond_assign.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 45adb267f2c..c0b69c8f0b1 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -239,12 +239,8 @@ public: bool lower_temps; bool lower_uniforms; - bool needs_lowering(ir_dereference_array *deref) const + bool storage_type_needs_lowering(ir_dereference_array *deref) const { - if (deref == NULL || deref->array_index->as_constant() - || !is_array_or_matrix(deref->array)) - return false; - if (deref->array->ir_type == ir_type_constant) return this->lower_temps; @@ -268,6 +264,15 @@ public: return false; } + bool needs_lowering(ir_dereference_array *deref) const + { + if (deref == NULL || deref->array_index->as_constant() + || !is_array_or_matrix(deref->array)) + return false; + + return this->storage_type_needs_lowering(deref); + } + ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, ir_assignment* orig_assign) { From 1731ac308631138ca98d34e8b7070c6e3f981939 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Jul 2011 12:18:19 -0700 Subject: [PATCH 057/600] glsl: Rework lowering of non-constant array indexing The previous implementation could easily get tricked if the LHS of an assignment included a non-constant index that was "inside" another dereference. For example: mat4 m[2]; m[0][i] = vec4(0.0); Due to the way it tracked whether the array was being assigned, it would think that the non-constant index was in an r-value. The new code fixes that by tracking l-values and r-values differently. The index is also replaced by cloning the IR and replacing the index variable instead of the odd way it was done before. v2: Apply some simplifications suggested by Eric Anholt. Making assignment_generator::rvalue be ir_dereference instead of ir_rvalue simplified the code a bit. Fixes i965 piglit fs-temp-array-mat[234]-index-wr and vs-varying-array-mat[234]-index-wr. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34691 Reviewed-by: Eric Anholt --- .../lower_variable_index_to_cond_assign.cpp | 135 +++++++++++++++--- 1 file changed, 116 insertions(+), 19 deletions(-) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index c0b69c8f0b1..107bcc67aed 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -29,6 +29,21 @@ * * Pre-DX10 GPUs often don't have a native way to do this operation, * and this works around that. + * + * The lowering process proceeds as follows. Each non-constant index + * found in an r-value is converted to a canonical form \c array[i]. Each + * element of the array is conditionally assigned to a temporary by comparing + * \c i to a constant index. This is done by cloning the canonical form and + * replacing all occurances of \c i with a constant. Each remaining occurance + * of the canonical form in the IR is replaced with a dereference of the + * temporary variable. + * + * L-values with non-constant indices are handled similarly. In this case, + * the RHS of the assignment is assigned to a temporary. The non-constant + * index is replace with the canonical form (just like for r-values). The + * temporary is conditionally assigned to each element of the canonical form + * by comparing \c i with each index. The same clone-and-replace scheme is + * used. */ #include "ir.h" @@ -43,10 +58,70 @@ is_array_or_matrix(const ir_instruction *ir) return (ir->type->is_array() || ir->type->is_matrix()); } +/** + * Replace a dereference of a variable with a specified r-value + * + * Each time a dereference of the specified value is replaced, the r-value + * tree is cloned. + */ +class deref_replacer : public ir_rvalue_visitor { +public: + deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) + : variable_to_replace(variable_to_replace), value(value), + progress(false) + { + assert(this->variable_to_replace != NULL); + assert(this->value != NULL); + } + + virtual void handle_rvalue(ir_rvalue **rvalue) + { + ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); + + if ((dv != NULL) && (dv->var == this->variable_to_replace)) { + this->progress = true; + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); + } + } + + const ir_variable *variable_to_replace; + ir_rvalue *value; + bool progress; +}; + +/** + * Find a variable index dereference of an array in an rvalue tree + */ +class find_variable_index : public ir_hierarchical_visitor { +public: + find_variable_index() + : deref(NULL) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + if (is_array_or_matrix(ir->array) + && (ir->array_index->as_constant() == NULL)) { + this->deref = ir; + return visit_stop; + } + + return visit_continue; + } + + /** + * First array dereference found in the tree that has a non-constant index. + */ + ir_dereference_array *deref; +}; + struct assignment_generator { ir_instruction* base_ir; - ir_rvalue* array; + ir_dereference *rvalue; + ir_variable *old_index; bool is_write; unsigned int write_mask; ir_variable* var; @@ -61,18 +136,23 @@ struct assignment_generator * underlying variable. */ void *mem_ctx = ralloc_parent(base_ir); - ir_dereference *element = - new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL), - new(mem_ctx) ir_constant(i)); - ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); - ir_assignment *assignment; - if (is_write) { - assignment = new(mem_ctx) ir_assignment(element, variable, condition, - write_mask); - } else { - assignment = new(mem_ctx) ir_assignment(variable, element, condition); - } + /* Clone the old r-value in its entirety. Then replace any occurances of + * the old variable index with the new constant index. + */ + ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); + ir_constant *const index = new(mem_ctx) ir_constant(i); + deref_replacer r(this->old_index, index); + element->accept(&r); + assert(r.progress); + + /* Generate a conditional assignment to (or from) the constant indexed + * array dereference. + */ + ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); + ir_assignment *const assignment = (is_write) + ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) + : new(mem_ctx) ir_assignment(variable, element, condition); list->push_tail(assignment); } @@ -274,7 +354,8 @@ public: } ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, - ir_assignment* orig_assign) + ir_assignment* orig_assign, + ir_dereference *orig_base) { assert(is_array_or_matrix(orig_deref->array)); @@ -320,9 +401,12 @@ public: new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); base_ir->insert_before(assign); + orig_deref->array_index = lhs->clone(mem_ctx, NULL); + assignment_generator ag; - ag.array = orig_deref->array; + ag.rvalue = orig_base; ag.base_ir = base_ir; + ag.old_index = index; ag.var = var; if (orig_assign) { ag.is_write = true; @@ -342,12 +426,16 @@ public: virtual void handle_rvalue(ir_rvalue **pir) { + if (this->in_assignee) + return; + if (!*pir) return; ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); if (needs_lowering(orig_deref)) { - ir_variable* var = convert_dereference_array(orig_deref, 0); + ir_variable *var = + convert_dereference_array(orig_deref, NULL, orig_deref); assert(var); *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); this->progress = true; @@ -359,10 +447,11 @@ public: { ir_rvalue_visitor::visit_leave(ir); - ir_dereference_array *orig_deref = ir->lhs->as_dereference_array(); + find_variable_index f; + ir->lhs->accept(&f); - if (needs_lowering(orig_deref)) { - convert_dereference_array(orig_deref, ir); + if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { + convert_dereference_array(f.deref, ir, ir->lhs); ir->remove(); this->progress = true; } @@ -383,7 +472,15 @@ lower_variable_index_to_cond_assign(exec_list *instructions, lower_temp, lower_uniform); - visit_list_elements(&v, instructions); + /* Continue lowering until no progress is made. If there are multiple + * levels of indirection (e.g., non-constant indexing of array elements and + * matrix columns of an array of matrix), each pass will only lower one + * level of indirection. + */ + do { + v.progress = false; + visit_list_elements(&v, instructions); + } while (v.progress); return v.progress; } From 5f83dfe5b70337bcffe215f7c32d0b862b5e7a3b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 17 Jul 2011 17:33:26 -0700 Subject: [PATCH 058/600] glsl: When lowering non-constant array indexing, respect existing conditions If the non-constant index was in the LHS of an assignment, any existing condititon on that assignment would be lost. Fixes i965 piglit: fs-temp-array-mat[234]-col-row-wr fs-temp-array-mat[234]-index-col-row-wr fs-temp-array-mat[234]-index-col-wr fs-temp-array-mat[234]-index-row-wr vs-varying-array-mat[234]-index-col-wr Reviewed-by: Eric Anholt --- .../lower_variable_index_to_cond_assign.cpp | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 107bcc67aed..e08ec13456b 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -417,9 +417,24 @@ public: switch_generator sg(ag, index, 4, 4); - exec_list list; - sg.generate(0, length, &list); - base_ir->insert_before(&list); + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); + + sg.generate(0, length, &if_stmt->then_instructions); + base_ir->insert_before(if_stmt); + } else { + exec_list list; + + sg.generate(0, length, &list); + base_ir->insert_before(&list); + } return var; } From 601428d2bbcf650c746f7a10b47228948f0ea912 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 17 Jul 2011 17:35:00 -0700 Subject: [PATCH 059/600] glsl: When lowering non-constant vector indexing, respect existing conditions If the non-constant index was in the LHS of an assignment, any existing condititon on that assignment would be lost. Reviewed-by: Eric Anholt --- src/glsl/lower_vec_index_to_cond_assign.cpp | 29 +++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp index 3c4d93201d2..15992e27288 100644 --- a/src/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/glsl/lower_vec_index_to_cond_assign.cpp @@ -171,21 +171,23 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT); + exec_list list; + /* Store the index to a temporary to avoid reusing its tree. */ index = new(ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i", ir_var_temporary); - ir->insert_before(index); + list.push_tail(index); deref = new(ir) ir_dereference_variable(index); assign = new(ir) ir_assignment(deref, orig_deref->array_index, NULL); - ir->insert_before(assign); + list.push_tail(assign); /* Store the RHS to a temporary to avoid reusing its tree. */ var = new(ir) ir_variable(ir->rhs->type, "vec_index_tmp_v", ir_var_temporary); - ir->insert_before(var); + list.push_tail(var); deref = new(ir) ir_dereference_variable(var); assign = new(ir) ir_assignment(deref, ir->rhs, NULL); - ir->insert_before(assign); + list.push_tail(assign); /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_deref->array->type->vector_elements; i++) { @@ -205,8 +207,25 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) deref = new(ir) ir_dereference_variable(var); assign = new(ir) ir_assignment(swizzle, deref, condition); - ir->insert_before(assign); + list.push_tail(assign); } + + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if (ir->condition != NULL) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(ir->condition); + + list.move_nodes_to(&if_stmt->then_instructions); + ir->insert_before(if_stmt); + } else { + ir->insert_before(&list); + } + ir->remove(); this->progress = true; From d6e1a8f71437d4a65e65f93271b2892dd62b0d23 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 17 Jul 2011 23:15:54 -0700 Subject: [PATCH 060/600] ir_to_mesa: Add each relative address to the previous This fixes many cases of accessing arrays of matrices using non-constant indices at each level. Fixes i965 piglit: vs-temp-array-mat[234]-index-col-rd vs-temp-array-mat[234]-index-col-row-rd vs-temp-array-mat[234]-index-col-wr vs-uniform-array-mat[234]-index-col-rd Fixes swrast piglit: fs-temp-array-mat[234]-index-col-rd fs-temp-array-mat[234]-index-col-row-rd fs-temp-array-mat[234]-index-col-wr fs-uniform-array-mat[234]-index-col-rd fs-uniform-array-mat[234]-index-col-row-rd fs-varying-array-mat[234]-index-col-rd fs-varying-array-mat[234]-index-col-row-rd vs-temp-array-mat[234]-index-col-rd vs-temp-array-mat[234]-index-col-row-rd vs-temp-array-mat[234]-index-col-wr vs-uniform-array-mat[234]-index-col-rd vs-uniform-array-mat[234]-index-col-row-rd vs-varying-array-mat[234]-index-col-rd vs-varying-array-mat[234]-index-col-row-rd vs-varying-array-mat[234]-index-col-wr Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index d8e5a3a9772..beb481b3a3b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1496,6 +1496,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir) this->result, src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_ADD, dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } From f7cd9a858c043e609fcdbf9ac9dfc1ef7ad002bf Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 17 Jul 2011 23:35:26 -0700 Subject: [PATCH 061/600] ir_to_mesa: Copy reladdr in src_reg(dst_reg) constructor Fixes i965 piglit: vs-temp-array-mat[234]-col-row-wr vs-temp-array-mat[234]-index-col-row-wr vs-temp-array-mat[234]-index-row-wr vs-temp-mat[234]-col-row-wr Fixes swrast piglit: fs-temp-array-mat[234]-col-row-wr fs-temp-array-mat[234]-index-col-row-wr fs-temp-array-mat[234]-index-row-wr fs-temp-mat[234]-col-row-wr vs-temp-array-mat[234]-col-row-wr vs-temp-array-mat[234]-index-col-row-wr vs-temp-array-mat[234]-index-row-wr vs-temp-mat[234]-col-row-wr Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index beb481b3a3b..8b4a535b75f 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } dst_reg::dst_reg(src_reg reg) From fbeb68e880318808f90c779cd3f8b8c4160eecf8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jul 2011 18:02:17 -0700 Subject: [PATCH 062/600] prog_optimize: Set unused regs to PROGRAM_UNDEFINED after CMP->MOV conversion Leaving the unused registers with other values caused assertion failures and other problems in places that blindly iterate over all sources. brw_vs_emit.c:1381: get_src_reg: Assertion `c->regs[file][index].nr != 0' failed. Fixes i965 piglit: vs-uniform-array-mat[234]-col-row-rd vs-uniform-array-mat[234]-index-col-row-rd vs-uniform-array-mat[234]-index-row-rd vs-uniform-mat[234]-col-row-rd Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/prog_optimize.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 8a40fa69eca..f4a7a638d5f 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -1319,6 +1319,15 @@ _mesa_simplify_cmp(struct gl_program * program) inst->Opcode = OPCODE_MOV; inst->SrcReg[0] = inst->SrcReg[1]; + + /* Unused operands are expected to have the file set to + * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes + * all of the sources. + */ + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; } } if (dbg) { From 337e2dfad0bcd567755272271abd2593a1d0fd1f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jul 2011 16:04:17 -0700 Subject: [PATCH 063/600] i965: When emitting a src/dst write of an output, keep the write mask Fixes i965 piglit: vs-varying-array-mat[234]-col-row-wr vs-varying-array-mat[234]-index-col-row-wr vs-varying-array-mat[234]-index-row-wr vs-varying-array-mat[234]-row-wr vs-varying-mat[234]-col-row-wr vs-varying-mat[234]-row-wr Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 9d733344a26..5ef8b0720ba 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1993,7 +1993,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) index = inst->DstReg.Index; file = inst->DstReg.File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; + /* Can't just make get_dst "do the right thing" here because other + * callers of get_dst don't expect any special behavior for the + * c->output_regs[index].used_in_src case. + */ + dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask); else dst = get_dst(c, inst->DstReg); From 1d3f09f15998c60326bf6c53a8d32c82496264ae Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jul 2011 18:07:50 -0700 Subject: [PATCH 064/600] i965: When emitting a src/dst read of an output, keep the swizzle and neg Fixes i965 piglit vs-varying-array-mat[234]-row-rd. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 5ef8b0720ba..d8cb0f7cb69 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1980,9 +1980,22 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) { + /* Can't just make get_arg "do the right thing" here because + * other callers of get_arg and get_src_reg don't expect any + * special behavior for the c->output_regs[index].used_in_src + * case. + */ + args[i] = c->output_regs[index].reg; + args[i].dw1.bits.swizzle = + BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle ARB_vp instructions */ + args[i].negate = src->Negate ? 1 : 0; + } else args[i] = get_arg(c, inst, i); } From 156f85336f80d542569f0b0182bd27c7f3218e70 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Jul 2011 16:25:33 -0700 Subject: [PATCH 065/600] glsl: Treat ir_dereference_array of non-var as a constant for lowering Previously the code would just look at deref->array->type to see if it was a constant. This isn't good enough because deref->array might be another ir_dereference_array... of a constant. As a result, deref->array->type wouldn't be a constant, but deref->variable_referenced() would return NULL. The unchecked NULL pointer would shortly lead to a segfault. Instead just look at the return of deref->variable_referenced(). If it's NULL, assume that either a constant or some other form of anonymous temporary storage is being dereferenced. This is a bit hinkey because most drivers treat constant arrays as uniforms, but the lowering pass treats them as temporaries. This keeps the behavior of the old code, so this change isn't making things worse. Fixes i965 piglit: vs-temp-array-mat[234]-index-col-rd vs-temp-array-mat[234]-index-col-row-rd vs-uniform-array-mat[234]-index-col-rd vs-uniform-array-mat[234]-index-col-row-rd Reviewed-by: Eric Anholt --- src/glsl/lower_variable_index_to_cond_assign.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index e08ec13456b..79fa58ec8d5 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -321,10 +321,16 @@ public: bool storage_type_needs_lowering(ir_dereference_array *deref) const { - if (deref->array->ir_type == ir_type_constant) + /* If a variable isn't eventually the target of this dereference, then + * it must be a constant or some sort of anonymous temporary storage. + * + * FINISHME: Is this correct? Most drivers treat arrays of constants as + * FINISHME: uniforms. It seems like this should do the same. + */ + const ir_variable *const var = deref->array->variable_referenced(); + if (var == NULL) return this->lower_temps; - const ir_variable *const var = deref->array->variable_referenced(); switch (var->mode) { case ir_var_auto: case ir_var_temporary: From 90cc372400e1f5869baabd841823bbf9296d7b01 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Jul 2011 18:48:39 -0700 Subject: [PATCH 066/600] glsl: Factor out code that generates block of index comparisons Reviewed-by: Eric Anholt --- src/glsl/ir_optimization.h | 4 + .../lower_variable_index_to_cond_assign.cpp | 111 +++++++++++------- 2 files changed, 73 insertions(+), 42 deletions(-) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 59a040751d9..f7808bdda9a 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -69,3 +69,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform); bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool optimize_redundant_jumps(exec_list *instructions); + +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx); diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 79fa58ec8d5..7792e6e624f 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -52,6 +52,70 @@ #include "glsl_types.h" #include "main/macros.h" +/** + * Generate a comparison value for a block of indices + * + * Lowering passes for non-constant indexing of arrays, matrices, or vectors + * can use this to generate blocks of index comparison values. + * + * \param instructions List where new instructions will be appended + * \param index \c ir_variable containing the desired index + * \param base Base value for this block of comparisons + * \param components Number of unique index values to compare. This must + * be on the range [1, 4]. + * \param mem_ctx ralloc memory context to be used for all allocations. + * + * \returns + * An \c ir_rvalue that \b must be cloned for each use in conditional + * assignments, etc. + */ +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx) +{ + ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); + + assert(index->type->is_scalar()); + assert(index->type->base_type == GLSL_TYPE_INT); + assert(components >= 1 && components <= 4); + + if (components > 1) { + const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; + broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); + } + + /* Compare the desired index value with the next block of four indices. + */ + ir_constant_data test_indices_data; + memset(&test_indices_data, 0, sizeof(test_indices_data)); + test_indices_data.i[0] = base; + test_indices_data.i[1] = base + 1; + test_indices_data.i[2] = base + 2; + test_indices_data.i[3] = base + 3; + + ir_constant *const test_indices = + new(mem_ctx) ir_constant(broadcast_index->type, + &test_indices_data); + + ir_rvalue *const condition_val = + new(mem_ctx) ir_expression(ir_binop_equal, + &glsl_type::bool_type[components - 1], + broadcast_index, + test_indices); + + ir_variable *const condition = + new(mem_ctx) ir_variable(condition_val->type, + "dereference_condition", + ir_var_temporary); + instructions->push_tail(condition); + + ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(condition); + instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); + + return cond_deref; +} + static inline bool is_array_or_matrix(const ir_instruction *ir) { @@ -204,54 +268,17 @@ struct switch_generator for (unsigned i = first; i < end; i += 4) { const unsigned comps = MIN2(condition_components, end - i); - ir_rvalue *broadcast_index = - new(this->mem_ctx) ir_dereference_variable(index); - - if (comps) { - const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false }; - broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m); - } - - /* Compare the desired index value with the next block of four indices. - */ - ir_constant_data test_indices_data; - memset(&test_indices_data, 0, sizeof(test_indices_data)); - test_indices_data.i[0] = i; - test_indices_data.i[1] = i + 1; - test_indices_data.i[2] = i + 2; - test_indices_data.i[3] = i + 3; - ir_constant *const test_indices = - new(this->mem_ctx) ir_constant(broadcast_index->type, - &test_indices_data); - - ir_rvalue *const condition_val = - new(this->mem_ctx) ir_expression(ir_binop_equal, - &glsl_type::bool_type[comps - 1], - broadcast_index, - test_indices); - - ir_variable *const condition = - new(this->mem_ctx) ir_variable(condition_val->type, - "dereference_array_condition", - ir_var_temporary); - list->push_tail(condition); - ir_rvalue *const cond_deref = - new(this->mem_ctx) ir_dereference_variable(condition); - list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref, - condition_val, 0)); + compare_index_block(list, index, i, comps, this->mem_ctx); if (comps == 1) { - ir_rvalue *const cond_deref = - new(this->mem_ctx) ir_dereference_variable(condition); - - this->generator.generate(i, cond_deref, list); + this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), + list); } else { for (unsigned j = 0; j < comps; j++) { - ir_rvalue *const cond_deref = - new(this->mem_ctx) ir_dereference_variable(condition); ir_rvalue *const cond_swiz = - new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1); + new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), + j, 0, 0, 0, 1); this->generator.generate(i + j, cond_swiz, list); } From 6c8f1f483a999005cae1da5b54cc8ca1904e7ce7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Jul 2011 18:51:25 -0700 Subject: [PATCH 067/600] glsl: Compare vector indices in blocks Just like the non-constant array index lowering pass, compare all N indices at once. For accesses to a vec4, this saves 3 comparison instructions on a vector architecture. Reviewed-by: Eric Anholt --- src/glsl/lower_vec_index_to_cond_assign.cpp | 62 +++++++++++++-------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp index 15992e27288..fce9c3424a1 100644 --- a/src/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/glsl/lower_vec_index_to_cond_assign.cpp @@ -71,8 +71,6 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue ir_assignment *assign; ir_variable *index, *var; ir_dereference *deref; - ir_expression *condition; - ir_swizzle *swizzle; int i; if (!orig_deref) @@ -86,39 +84,52 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT); + exec_list list; + /* Store the index to a temporary to avoid reusing its tree. */ index = new(base_ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i", ir_var_temporary); - base_ir->insert_before(index); + list.push_tail(index); deref = new(base_ir) ir_dereference_variable(index); assign = new(base_ir) ir_assignment(deref, orig_deref->array_index, NULL); - base_ir->insert_before(assign); + list.push_tail(assign); /* Temporary where we store whichever value we swizzle out. */ var = new(base_ir) ir_variable(ir->type, "vec_index_tmp_v", ir_var_temporary); - base_ir->insert_before(var); + list.push_tail(var); + + /* Generate a single comparison condition "mask" for all of the components + * in the vector. + */ + ir_rvalue *const cond_deref = + compare_index_block(&list, index, 0, + orig_deref->array->type->vector_elements, + mem_ctx); /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_deref->array->type->vector_elements; i++) { - deref = new(base_ir) ir_dereference_variable(index); - condition = new(base_ir) ir_expression(ir_binop_equal, - glsl_type::bool_type, - deref, - new(base_ir) ir_constant(i)); + ir_rvalue *condition_swizzle = + new(base_ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1); /* Just clone the rest of the deref chain when trying to get at the * underlying variable. */ - swizzle = new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); + ir_rvalue *swizzle = + new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); deref = new(base_ir) ir_dereference_variable(var); - assign = new(base_ir) ir_assignment(deref, swizzle, condition); - base_ir->insert_before(assign); + assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); + list.push_tail(assign); } + /* Put all of the new instructions in the IR stream before the old + * instruction. + */ + base_ir->insert_before(&list); + this->progress = true; return new(base_ir) ir_dereference_variable(var); } @@ -189,24 +200,29 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) assign = new(ir) ir_assignment(deref, ir->rhs, NULL); list.push_tail(assign); + /* Generate a single comparison condition "mask" for all of the components + * in the vector. + */ + ir_rvalue *const cond_deref = + compare_index_block(&list, index, 0, + orig_deref->array->type->vector_elements, + mem_ctx); + /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_deref->array->type->vector_elements; i++) { - ir_rvalue *condition, *swizzle; + ir_rvalue *condition_swizzle = + new(ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1); - deref = new(ir) ir_dereference_variable(index); - condition = new(ir) ir_expression(ir_binop_equal, - glsl_type::bool_type, - deref, - new(ir) ir_constant(i)); /* Just clone the rest of the deref chain when trying to get at the * underlying variable. */ - swizzle = new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); + ir_rvalue *swizzle = + new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); deref = new(ir) ir_dereference_variable(var); - assign = new(ir) ir_assignment(swizzle, deref, condition); + assign = new(ir) ir_assignment(swizzle, deref, condition_swizzle); list.push_tail(assign); } From ac6455e9a25f5472c96d580e3d2389f1ed1c0619 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 21 Jul 2011 20:10:34 -0400 Subject: [PATCH 068/600] gallium/softpipe: Don't clobber dest color/alpha before masking. The blend_quad function clobbers the actual render target color/alpha values while applying the destination blend factor, which results in restoring the wrong value during the masking stage for write-disabled channels. Reviewed-by: Brian Paul --- src/gallium/drivers/softpipe/sp_quad_blend.c | 185 ++++++++++--------- 1 file changed, 96 insertions(+), 89 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 04bfd14b7c6..82f9785e32a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -240,6 +240,7 @@ blend_quad(struct quad_stage *qs, static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; float source[4][QUAD_SIZE] = { { 0 } }; + float blend_dest[4][QUAD_SIZE]; /* * Compute src/first term RGB @@ -480,79 +481,85 @@ blend_quad(struct quad_stage *qs, assert(0 && "invalid alpha src factor"); } + /* Save the original dest for use in masking */ + VEC4_COPY(blend_dest[0], dest[0]); + VEC4_COPY(blend_dest[1], dest[1]); + VEC4_COPY(blend_dest[2], dest[2]); + VEC4_COPY(blend_dest[3], dest[3]); + /* - * Compute dest/second term RGB + * Compute blend_dest/second term RGB */ switch (softpipe->blend->rt[blend_index].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */ break; case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: if (has_dst_alpha) { const float *alpha = quadColor[3]; float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); + VEC4_SUB(diff, one, blend_dest[3]); VEC4_MIN(temp, alpha, diff); - VEC4_MUL(dest[0], quadColor[0], temp); /* R */ - VEC4_MUL(dest[1], quadColor[1], temp); /* G */ - VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ break; case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: @@ -563,45 +570,45 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -609,22 +616,22 @@ blend_quad(struct quad_stage *qs, float inv_comp[4]; /* R */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* G */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* B */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -637,29 +644,29 @@ blend_quad(struct quad_stage *qs, } /* - * Compute dest/second term A + * Compute blend_dest/second term A */ switch (softpipe->blend->rt[blend_index].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */ break; case PIPE_BLENDFACTOR_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ @@ -667,11 +674,11 @@ blend_quad(struct quad_stage *qs, { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: /* fall-through */ @@ -679,7 +686,7 @@ blend_quad(struct quad_stage *qs, { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: @@ -687,11 +694,11 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */ } else { - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -700,7 +707,7 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp); } break; default: @@ -712,29 +719,29 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].rgb_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */ break; default: assert(0 && "invalid rgb blend func"); @@ -745,19 +752,19 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].alpha_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */ break; default: assert(0 && "invalid alpha blend func"); From 4d23c6df81639057f12a604556121aa7b41d921c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 24 Jul 2011 19:11:34 +0200 Subject: [PATCH 069/600] r600g: use file_max instead of file_count to determine reg offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise shaders with skipped inputs/outputs doesn't work correctly. Signed-off-by: Christian König --- src/gallium/drivers/r600/r600_shader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3e21ad1fdc6..494f9370597 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -658,9 +658,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + - ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.info.file_max[TGSI_FILE_INPUT] + 1; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + - ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; /* Outside the GPR range. This will be translated to one of the * kcache banks later. */ @@ -668,7 +668,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + - ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; From 4c84acc86fce5eda0aabcb8aa362fd6b5e6a28f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Jul 2011 01:32:39 +0200 Subject: [PATCH 070/600] g3dvl: remove unused vs output from create_ref_vert_shader The position of the quad vertex is calculated in calc_position, so we don't need the output here any more. --- src/gallium/auxiliary/vl/vl_mc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index e5ae0f72c4c..0b3723c9792 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -105,7 +105,7 @@ create_ref_vert_shader(struct vl_mc *r) struct ureg_src mv_scale; struct ureg_src vmv[2]; struct ureg_dst t_vpos; - struct ureg_dst o_vpos, o_vmv[2]; + struct ureg_dst o_vmv[2]; unsigned i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); @@ -120,9 +120,6 @@ create_ref_vert_shader(struct vl_mc *r) (float)MACROBLOCK_HEIGHT / r->buffer_height) ); - /* XXX The position is not written, which may lead to undefined rendering. - * XXX This is a serious bug. */ - o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); From 42cdf4074e0f7d561b03a86255fa8f916f906bf6 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Mon, 25 Jul 2011 09:37:02 +0200 Subject: [PATCH 071/600] configure: Move gbm before egl in SRC_DIRS egl_dri2 built into libEGL depends on libgbm. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39515 --- configure.ac | 70 ++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/configure.ac b/configure.ac index 86ba87b39e8..0ea264ef042 100644 --- a/configure.ac +++ b/configure.ac @@ -1325,6 +1325,41 @@ AC_SUBST([OSMESA_MESA_DEPS]) AC_SUBST([OSMESA_PC_REQ]) AC_SUBST([OSMESA_PC_LIB_PRIV]) +dnl +dnl gbm configuration +dnl +if test "x$enable_gbm" = xauto; then + case "$with_egl_platforms" in + *drm*) + enable_gbm=yes ;; + *) + enable_gbm=no ;; + esac +fi +if test "x$enable_gbm" = xyes; then + SRC_DIRS="$SRC_DIRS gbm" + GBM_BACKEND_DIRS="" + + PKG_CHECK_MODULES([LIBUDEV], [libudev], [], + AC_MSG_ERROR([gbm needs udev])) + GBM_LIB_DEPS="$DLOPEN_LIBS $LIBUDEV_LIBS" + + if test "x$enable_dri" = xyes; then + GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri" + if test "$SHARED_GLAPI" -eq 0; then + AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi]) + fi + fi +fi +AC_SUBST([GBM_LIB_DEPS]) +AC_SUBST([GBM_BACKEND_DIRS]) +GBM_PC_REQ_PRIV="libudev" +GBM_PC_LIB_PRIV="$DLOPEN_LIBS" +GBM_PC_CFLAGS= +AC_SUBST([GBM_PC_REQ_PRIV]) +AC_SUBST([GBM_PC_LIB_PRIV]) +AC_SUBST([GBM_PC_CFLAGS]) + dnl dnl EGL configuration dnl @@ -1366,41 +1401,6 @@ fi AC_SUBST([EGL_LIB_DEPS]) AC_SUBST([EGL_DRIVERS_DIRS]) -dnl -dnl gbm configuration -dnl -if test "x$enable_gbm" = xauto; then - case "$with_egl_platforms" in - *drm*) - enable_gbm=yes ;; - *) - enable_gbm=no ;; - esac -fi -if test "x$enable_gbm" = xyes; then - SRC_DIRS="$SRC_DIRS gbm" - GBM_BACKEND_DIRS="" - - PKG_CHECK_MODULES([LIBUDEV], [libudev], [], - AC_MSG_ERROR([gbm needs udev])) - GBM_LIB_DEPS="$DLOPEN_LIBS $LIBUDEV_LIBS" - - if test "x$enable_dri" = xyes; then - GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri" - if test "$SHARED_GLAPI" -eq 0; then - AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi]) - fi - fi -fi -AC_SUBST([GBM_LIB_DEPS]) -AC_SUBST([GBM_BACKEND_DIRS]) -GBM_PC_REQ_PRIV="libudev" -GBM_PC_LIB_PRIV="$DLOPEN_LIBS" -GBM_PC_CFLAGS= -AC_SUBST([GBM_PC_REQ_PRIV]) -AC_SUBST([GBM_PC_LIB_PRIV]) -AC_SUBST([GBM_PC_CFLAGS]) - dnl dnl EGL Gallium configuration dnl From 4f90b89961ea2795f274611266b649803a058026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Jul 2011 02:58:31 +0200 Subject: [PATCH 072/600] gallium: change formats merged with pipe-video to type "other" Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39276 --- src/gallium/auxiliary/util/u_format.csv | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 347e2beb8dd..a3d2aae62c8 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -260,10 +260,10 @@ PIPE_FORMAT_R10G10B10X2_USCALED , plain, 1, 1, u10 , u10 , u10 , x2 , xyz1, r # A.k.a. D3DDECLTYPE_DEC3N PIPE_FORMAT_R10G10B10X2_SNORM , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb -PIPE_FORMAT_YV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_YV16 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_IYUV , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_NV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_NV21 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_IA44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_AI44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV12 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV16 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IYUV , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV12 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV21 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IA44 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_AI44 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv From 7746b7d4bf48b75dd273510e7a6ad6405c91b8bb Mon Sep 17 00:00:00 2001 From: Emeric Date: Mon, 18 Jul 2011 15:17:25 +0000 Subject: [PATCH 073/600] vdpau: enable mpeg1 hw decoding, using the exact same code path as mpeg2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39307 Signed-off-by: Christian König --- src/gallium/state_trackers/vdpau/decode.c | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c index 269c7a4baf8..96542f874d9 100644 --- a/src/gallium/state_trackers/vdpau/decode.c +++ b/src/gallium/state_trackers/vdpau/decode.c @@ -161,12 +161,12 @@ vlVdpDecoderGetParameters(VdpDecoder decoder, } static VdpStatus -vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder, - struct pipe_video_decode_buffer *buffer, - struct pipe_video_buffer *target, - VdpPictureInfoMPEG1Or2 *picture_info, - uint32_t bitstream_buffer_count, - VdpBitstreamBuffer const *bitstream_buffers) +vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder, + struct pipe_video_decode_buffer *buffer, + struct pipe_video_buffer *target, + VdpPictureInfoMPEG1Or2 *picture_info, + uint32_t bitstream_buffer_count, + VdpBitstreamBuffer const *bitstream_buffers) { struct pipe_mpeg12_picture_desc picture; struct pipe_video_buffer *ref_frames[2]; @@ -254,17 +254,18 @@ vlVdpDecoderRender(VdpDecoder decoder, // TODO: Recreate decoder with correct chroma return VDP_STATUS_INVALID_CHROMA_TYPE; - // TODO: Right now only mpeg2 is supported. + // TODO: Right now only mpeg 1 & 2 is supported. switch (vldecoder->decoder->profile) { + case PIPE_VIDEO_PROFILE_MPEG1: case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: case PIPE_VIDEO_PROFILE_MPEG2_MAIN: ++vldecoder->cur_buffer; vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS; - return vlVdpDecoderRenderMpeg2(vldecoder->decoder, - vldecoder->buffer[vldecoder->cur_buffer], - vlsurf->video_buffer, - (VdpPictureInfoMPEG1Or2 *)picture_info, - bitstream_buffer_count,bitstream_buffers); + return vlVdpDecoderRenderMpeg12(vldecoder->decoder, + vldecoder->buffer[vldecoder->cur_buffer], + vlsurf->video_buffer, + (VdpPictureInfoMPEG1Or2 *)picture_info, + bitstream_buffer_count,bitstream_buffers); break; default: From 156cef0fbacf242e8fc67e39ab964e5f8f3739cb Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 21 Jul 2011 21:17:10 -0700 Subject: [PATCH 074/600] i965/fs: Clear result before visiting shadow comparitor and LOD info. Commit 53c89c67f33639afef951e178f93f4e29acc5d53 ("i965: Avoid generating MOVs for assignments of expressions.") added the line "this->result = reg_undef" all over the code. Unfortunately, since Eric developed his patch before I landed Ivybridge support, he missed adding it to fs_visitor::emit_texture_gen7() after rebasing. Furthermore, since I developed TXD support before Eric's patch, I neglected to add it to the gradient handling when I rebased. Neglecting to set this causes the visitor to use this->result as storage rather than generating a new temporary. These missing statements resulted in the same register being used to store several different values. Fixes the following piglit tests on Ivybridge: - glsl-fs-shadow2dproj.shader_test - glsl-fs-shadow2dproj-bias.shader_test NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cbe5cf428c5..9632aae64b0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -603,9 +603,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -786,9 +788,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -850,6 +854,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } if (ir->shadow_comparitor && ir->op != ir_txd) { + this->result = reg_undef; ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -860,11 +865,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tex: break; case ir_txb: + this->result = reg_undef; ir->lod_info.bias->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; case ir_txl: + this->result = reg_undef; ir->lod_info.lod->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -873,9 +880,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (c->dispatch_width == 16) fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -1070,6 +1079,7 @@ fs_visitor::visit(ir_texture *ir) if (hw_compare_supported) { inst->shadow_compare = true; } else { + this->result = reg_undef; ir->shadow_comparitor->accept(this); fs_reg ref = this->result; From 572f6318954f31fcf3d396ac5df8e9eff3f37c74 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 12 May 2011 04:02:32 -0700 Subject: [PATCH 075/600] i965/gen7: Fix shadow sampling in the old brw_wm_emit backend. On Ivybridge, the shadow comparitor goes in the first slot, rather than at the end. It's not necessary to send u, v, and r. Fixes tests texturing/texdepth and glean/fbo. NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_wm_emit.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f61757a8cac..6ea4a7d6e50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c, if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; - /* For shadow comparisons, we have to supply u,v,r. */ - if (shadow) - nr_texcoords = 3; + if (shadow) { + if (intel->gen < 7) { + /* For shadow comparisons, we have to supply u,v,r. */ + nr_texcoords = 3; + } else { + /* On Ivybridge, the shadow comparitor comes first. Just load it. */ + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } + } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { @@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Fill in the shadow comparison reference value. */ - if (shadow) { + if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); From d92463d5dc42aca09a54588c322fc60582cf9131 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 22 Jul 2011 14:05:52 -0700 Subject: [PATCH 076/600] i965: vs optimization fix: Check val.{negate,abs} in accumulator_contains() When emitting a MAC instruction in a vertex shader, brw_vs_emit() calls accumulator_contains() to determine whether the accumulator already contains the appropriate addend; if it does, then we can avoid emitting an unnecessary MOV instruction. However, accumulator_contains() wasn't checking the val.negate or val.abs flags. As a result, if the desired value was the negation, or the absolute value, of what was already in the accumulator, we would generate an incorrect shader. Fixes piglit test vs-refract-vec4-vec4-float. Tested on Gen5 and Gen6. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index d8cb0f7cb69..674a994bace 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1821,6 +1821,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) if (val.address_mode != BRW_ADDRESS_DIRECT) return GL_FALSE; + if (val.negate || val.abs) + return GL_FALSE; + switch (prev_insn->header.opcode) { case BRW_OPCODE_MOV: case BRW_OPCODE_MAC: From 185868c9c2e6a31a7313df2dbe29490547b65f61 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 11:50:27 -0700 Subject: [PATCH 077/600] i965: Emit texture cache flushes on gen6 along with render cache flushes. It turns out that internally the texture cache gets flushed in a couple of cases, particularly around 2D operations mixed with 3D. In almost all cases one of those happens between rendering to an FBO-attached texture and rendering from that texture. However, as of the next patch, glean tfbo (and the new fbo-flushing-2 test) would manage to get stale texture values because one of those flushes didn't occur. The intention of this code was always to get the render cache cleared and ready to be used from the sampler cache (and it does on <= gen4), so this just catches gen5 up. This patch was also tested to fix fbo-flushing on gen7. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 97cc219ce6d..db4343be10c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -390,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_TC_FLUSH | PIPE_CONTROL_NO_WRITE); OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ From 808024689247561d3de225856fb6ef17430fd39e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 21 Jul 2011 09:15:05 -0700 Subject: [PATCH 078/600] meta: Also save/restore clip planes for GLSL. Fixes user-clip on 965 with 3D clears enabled. I created a separate flag because I wanted to avoid the overhead of the matrix operations in this path. Reviewed-by: Brian Paul --- src/mesa/drivers/common/meta.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index fa78674e4eb..26c89519679 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -90,13 +90,14 @@ #define META_SCISSOR 0x100 #define META_SHADER 0x200 #define META_STENCIL_TEST 0x400 -#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */ +#define META_TRANSFORM 0x800 /**< modelview/projection matrix state */ #define META_TEXTURE 0x1000 #define META_VERTEX 0x2000 #define META_VIEWPORT 0x4000 #define META_CLAMP_FRAGMENT_COLOR 0x8000 #define META_CLAMP_VERTEX_COLOR 0x10000 #define META_CONDITIONAL_RENDER 0x20000 +#define META_CLIP 0x40000 /*@}*/ @@ -165,6 +166,8 @@ struct save_state GLfloat ModelviewMatrix[16]; GLfloat ProjectionMatrix[16]; GLfloat TextureMatrix[16]; + + /** META_CLIP */ GLbitfield ClipPlanesEnabled; /** META_TEXTURE */ @@ -547,6 +550,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_Ortho(0.0, ctx->DrawBuffer->Width, 0.0, ctx->DrawBuffer->Height, -1.0, 1.0); + } + + if (state & META_CLIP) { save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; if (ctx->Transform.ClipPlanesEnabled) { GLuint i; @@ -846,7 +852,9 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_LoadMatrixf(save->ProjectionMatrix); _mesa_MatrixMode(save->MatrixMode); + } + if (state & META_CLIP) { if (save->ClipPlanesEnabled) { GLuint i; for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { @@ -1669,6 +1677,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) META_STENCIL_TEST | META_VERTEX | META_VIEWPORT | + META_CLIP | META_CLAMP_FRAGMENT_COLOR); if (!(buffers & BUFFER_BITS_COLOR)) { @@ -1783,6 +1792,7 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT)); @@ -2104,6 +2114,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT | META_CLAMP_FRAGMENT_COLOR | @@ -2313,6 +2324,7 @@ _mesa_meta_Bitmap(struct gl_context *ctx, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT)); From a0e5affb22da50aeb30262f5ba0912b059d858ea Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 19 May 2011 11:02:14 -0700 Subject: [PATCH 079/600] i965: Use 3D clears on gen6+ to avoid inter-ring synchronization. Improves firefox-talos-gfx around 5%. --- src/mesa/drivers/dri/intel/intel_clear.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 81c062fba53..76d33f9b37e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } /* HW color buffers (front, back, aux, generic FBO, etc) */ - if (colorMask == ~0) { + if (intel->gen < 6 && colorMask == ~0) { /* clear all R,G,B,A */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { /* glColorMask in effect */ - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); + tri_mask |= (mask & BUFFER_BITS_COLOR); } /* Make sure we have up to date buffers before we start looking at From 818db3848bfaa002d0e7cf6b9b615a31eb82ba25 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 10:56:10 -0700 Subject: [PATCH 080/600] i965: Fix many of the trivial WebGL demos that broke due to IB optimization. The index buffer state emit only occurred if there was an IB in place and we were in either a new batch or a new IB state. But because we only flagged new IB state if IB state changed from the last IB state we calculated, we could simply never emit IB state after batchbuffer wraps if the first draw didn't use the IB and we didn't actually change the IB. Fixes piglit glx-multi-context-ib-1. --- src/mesa/drivers/dri/i965/brw_vtbl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 55dbd4fa8b0..40360b23fff 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel ) brw->state_batch_count = 0; brw->vb.nr_current_buffers = 0; + brw->ib.type = -1; /* Mark that the current program cache BO has been used by the GPU. * It will be reallocated if we need to put new programs in for the From 28a336dc38c478b809544e7404c4d1fddd873333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 18:58:30 +0200 Subject: [PATCH 081/600] winsys/radeon: simplify how value queries work This drops the get_value query and adds a function query_info, which returns all the values in one nice structure. --- src/gallium/drivers/r300/r300_chipset.c | 6 +- src/gallium/drivers/r300/r300_chipset.h | 8 +- src/gallium/drivers/r300/r300_context.c | 20 +-- src/gallium/drivers/r300/r300_emit.c | 11 +- src/gallium/drivers/r300/r300_query.c | 4 +- src/gallium/drivers/r300/r300_screen.c | 11 +- src/gallium/drivers/r300/r300_screen.h | 11 +- src/gallium/drivers/r300/r300_texture_desc.c | 4 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 4 +- .../winsys/radeon/drm/radeon_drm_winsys.c | 154 ++++++++---------- .../winsys/radeon/drm/radeon_drm_winsys.h | 12 +- src/gallium/winsys/radeon/drm/radeon_winsys.h | 61 +++---- 12 files changed, 134 insertions(+), 172 deletions(-) diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 571986c3011..80148b80afb 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -31,9 +31,9 @@ * Radeons. */ /* Parse a PCI ID and fill an r300_capabilities struct with information. */ -void r300_parse_chipset(struct r300_capabilities* caps) +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps) { - switch (caps->pci_id) { + switch (pci_id) { #define CHIPSET(pci_id, name, chipfamily) \ case pci_id: \ caps->family = CHIP_FAMILY_##chipfamily; \ @@ -43,7 +43,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) default: fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", - caps->pci_id); + pci_id); abort(); } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 4df6b5b6292..f96cdaf2580 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -43,16 +43,10 @@ enum r300_zmask_compression { /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { - /* PCI ID */ - uint32_t pci_id; /* Chipset family */ int family; /* The number of vertex floating-point units */ unsigned num_vert_fpus; - /* The number of fragment pipes */ - unsigned num_frag_pipes; - /* The number of z pipes */ - unsigned num_z_pipes; /* The number of texture units. */ unsigned num_tex_units; /* Whether or not TCL is physically present */ @@ -121,6 +115,6 @@ enum { CHIP_FAMILY_RV570 }; -void r300_parse_chipset(struct r300_capabilities* caps); +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps); #endif /* R300_CHIPSET_H */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d94ac74f0e5..2b3329e9f86 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -173,7 +173,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0); + boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6; /* Create the actual atom list. * @@ -380,7 +380,7 @@ static void r300_init_states(struct pipe_context *pipe) if (r300->screen->caps.is_r500 || (r300->screen->caps.is_rv350 && - r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) { + r300->screen->info.drm_minor >= 6)) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; @@ -520,15 +520,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" "r300: GART size: %d MB, VRAM size: %d MB\n" "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n", - rws->get_value(rws, RADEON_VID_DRM_MAJOR), - rws->get_value(rws, RADEON_VID_DRM_MINOR), - rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL), + r300->screen->info.drm_major, + r300->screen->info.drm_minor, + r300->screen->info.drm_patchlevel, screen->get_name(screen), - rws->get_value(rws, RADEON_VID_PCI_ID), - rws->get_value(rws, RADEON_VID_R300_GB_PIPES), - rws->get_value(rws, RADEON_VID_R300_Z_PIPES), - rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20, - rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20, + r300->screen->info.pci_id, + r300->screen->info.r300_num_gb_pipes, + r300->screen->info.r300_num_z_pipes, + r300->screen->info.gart_size >> 20, + r300->screen->info.vram_size >> 20, "YES", /* XXX really? */ r300->screen->caps.zmask_ram ? "YES" : "NO", r300->screen->caps.hiz_ram ? "YES" : "NO"); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d214af4cd5b..502aed3a20c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -574,11 +574,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes; CS_LOCALS(r300); - assert(caps->num_frag_pipes); + assert(gb_pipes); - BEGIN_CS(6 * caps->num_frag_pipes + 2); + BEGIN_CS(6 * gb_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... * @@ -587,7 +588,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, * 4-byte offset for each pipe. RV380 and older are special; they have * only two pipes, and the second pipe's enable is on bit 3, not bit 1, * so there's a chipset cap for that. */ - switch (caps->num_frag_pipes) { + switch (gb_pipes) { case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); @@ -613,7 +614,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" - " pixel pipes!\n", caps->num_frag_pipes); + " pixel pipes!\n", gb_pipes); abort(); } @@ -663,7 +664,7 @@ void r300_emit_query_end(struct r300_context* r300) return; if (caps->family == CHIP_FAMILY_RV530) { - if (caps->num_z_pipes == 2) + if (r300->screen->info.r300_num_z_pipes == 2) rv530_emit_query_end_double_z(r300, query); else rv530_emit_query_end_single_z(r300, query); diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 782f041e926..000114129bf 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -49,9 +49,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) - q->num_pipes = r300screen->caps.num_z_pipes; + q->num_pipes = r300screen->info.r300_num_z_pipes; else - q->num_pipes = r300screen->caps.num_frag_pipes; + q->num_pipes = r300screen->info.r300_num_gb_pipes; insert_at_tail(&r300->query_list, q); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c8df45fb3e7..d9378308ad0 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -327,9 +327,8 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, unsigned sample_count, unsigned usage) { - struct radeon_winsys *rws = r300_screen(screen)->rws; uint32_t retval = 0; - boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0); + boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -497,19 +496,17 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) return NULL; } - r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID); - r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES); - r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES); + rws->query_info(rws, &r300screen->info); r300_init_debug(r300screen); - r300_parse_chipset(&r300screen->caps); + r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps); if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) r300screen->caps.zmask_ram = 0; if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0)) + if (r300screen->info.drm_minor < 8) r300screen->caps.has_us_format = FALSE; pipe_mutex_init(r300screen->num_contexts_mutex); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index e5c53bf3500..82b2068e7a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -24,23 +24,20 @@ #ifndef R300_SCREEN_H #define R300_SCREEN_H -#include "pipe/p_screen.h" - #include "r300_chipset.h" - +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "pipe/p_screen.h" #include "util/u_slab.h" - #include -struct radeon_winsys; - struct r300_screen { /* Parent class */ struct pipe_screen screen; struct radeon_winsys *rws; - /* Chipset capabilities */ + /* Chipset info and capabilities. */ + struct radeon_info info; struct r300_capabilities caps; /* Memory pools. */ diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index da5778be65e..fe4f8dd5679 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -360,9 +360,9 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, unsigned i, pipes; if (screen->caps.family == CHIP_FAMILY_RV530) { - pipes = screen->caps.num_z_pipes; + pipes = screen->info.r300_num_z_pipes; } else { - pipes = screen->caps.num_frag_pipes; + pipes = screen->info.r300_num_gb_pipes; } for (i = 0; i <= tex->b.b.b.last_level; i++) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 0139de1973a..f0f4a70be3f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -308,8 +308,8 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - return cs->csc->used_gart < cs->ws->gart_size * 0.8 && - cs->csc->used_vram < cs->ws->vram_size * 0.8; + return cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && + cs->csc->used_vram < cs->ws->info.vram_size * 0.8; } static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 0474b381ade..473f388d121 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -103,17 +103,31 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier, return FALSE; } +static boolean radeon_get_drm_value(int fd, unsigned request, + const char *name, uint32_t *out) +{ + struct drm_radeon_info info = {0}; + int retval; + + info.value = (unsigned long)out; + info.request = request; + + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + fprintf(stderr, "%s: Failed to get %s, error number %d\n", + __func__, name, retval); + return FALSE; + } + return TRUE; +} + /* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(struct radeon_drm_winsys *winsys) +static boolean do_winsys_init(struct radeon_drm_winsys *ws) { struct drm_radeon_gem_info gem_info = {0}; - struct drm_radeon_info info = {0}; - int target = 0; int retval; drmVersionPtr version; - info.value = (unsigned long)⌖ - /* We do things in a specific order here. * * DRM version first. We need to be sure we're running on a KMS chipset. @@ -123,71 +137,76 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) * for all Radeons. If this fails, we probably got handed an FD for some * non-Radeon card. * + * The GEM info is actually bogus on the kernel side, as well as our side + * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because + * we don't actually use the info for anything yet. + * * The GB and Z pipe requests should always succeed, but they might not * return sensical values for all chipsets, but that's alright because * the pipe drivers already know that. - * - * The GEM info is actually bogus on the kernel side, as well as our side - * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. */ + */ - version = drmGetVersion(winsys->fd); + /* Get DRM version. */ + version = drmGetVersion(ws->fd); if (version->version_major != 2 || version->version_minor < 3) { fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.3.x (kernel 2.6.34) and later.\n", + "only compatible with 2.3.x (kernel 2.6.34) or later.\n", __FUNCTION__, version->version_major, version->version_minor, version->version_patchlevel); drmFreeVersion(version); - exit(1); + return FALSE; } - winsys->drm_major = version->version_major; - winsys->drm_minor = version->version_minor; - winsys->drm_patchlevel = version->version_patchlevel; + ws->info.drm_major = version->version_major; + ws->info.drm_minor = version->version_minor; + ws->info.drm_patchlevel = version->version_patchlevel; + drmFreeVersion(version); - info.request = RADEON_INFO_DEVICE_ID; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get PCI ID, " - "error number %d\n", __FUNCTION__, retval); - exit(1); + /* Get PCI ID. */ + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", + &ws->info.pci_id)) + return FALSE; + + /* Check PCI ID. */ + switch (ws->info.pci_id) { +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r300_pci_ids.h" +#undef CHIPSET + break; + + default: + fprintf(stderr, "radeon: Invalid PCI ID.\n"); + return FALSE; } - winsys->pci_id = target; - info.request = RADEON_INFO_NUM_GB_PIPES; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get GB pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->gb_pipes = target; - - info.request = RADEON_INFO_NUM_Z_PIPES; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get Z pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->z_pipes = target; - - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, + /* Get GEM info. */ + retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { fprintf(stderr, "%s: Failed to get MM info, error number %d\n", __FUNCTION__, retval); - exit(1); + return FALSE; } - winsys->gart_size = gem_info.gart_size; - winsys->vram_size = gem_info.vram_size; + ws->info.gart_size = gem_info.gart_size; + ws->info.vram_size = gem_info.vram_size; - drmFreeVersion(version); + ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); - winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + /* Generation-specific queries. */ + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, + "GB pipe count", + &ws->info.r300_num_gb_pipes)) + return FALSE; + + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, + "Z pipe count", + &ws->info.r300_num_z_pipes)) + return FALSE; + + return TRUE; } static void radeon_winsys_destroy(struct radeon_winsys *rws) @@ -202,34 +221,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) FREE(rws); } -static uint32_t radeon_get_value(struct radeon_winsys *rws, - enum radeon_value_id id) +static void radeon_query_info(struct radeon_winsys *rws, + struct radeon_info *info) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; - - switch(id) { - case RADEON_VID_PCI_ID: - return ws->pci_id; - case RADEON_VID_R300_GB_PIPES: - return ws->gb_pipes; - case RADEON_VID_R300_Z_PIPES: - return ws->z_pipes; - case RADEON_VID_GART_SIZE: - return ws->gart_size; - case RADEON_VID_VRAM_SIZE: - return ws->vram_size; - case RADEON_VID_DRM_MAJOR: - return ws->drm_major; - case RADEON_VID_DRM_MINOR: - return ws->drm_minor; - case RADEON_VID_DRM_PATCHLEVEL: - return ws->drm_patchlevel; - case RADEON_VID_DRM_2_6_0: - return ws->drm_major*100 + ws->drm_minor >= 206; - case RADEON_VID_DRM_2_8_0: - return ws->drm_major*100 + ws->drm_minor >= 208; - } - return 0; + *info = ((struct radeon_drm_winsys *)rws)->info; } static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, @@ -268,16 +263,9 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) } ws->fd = fd; - do_ioctls(ws); - switch (ws->pci_id) { -#define CHIPSET(pci_id, name, family) case pci_id: -#include "pci_ids/r300_pci_ids.h" -#undef CHIPSET - break; - default: - goto fail; - } + if (!do_winsys_init(ws)) + goto fail; /* Create managers. */ ws->kman = radeon_bomgr_create(ws); @@ -289,7 +277,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) /* Set functions. */ ws->base.destroy = radeon_winsys_destroy; - ws->base.get_value = radeon_get_value; + ws->base.query_info = radeon_query_info; ws->base.cs_request_feature = radeon_cs_request_feature; radeon_bomgr_init_functions(ws); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index d5186bc4d17..347e1f1d11a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -31,7 +31,6 @@ #define RADEON_DRM_WINSYS_H #include "radeon_winsys.h" - #include "os/os_thread.h" struct radeon_drm_winsys { @@ -40,20 +39,13 @@ struct radeon_drm_winsys { int fd; /* DRM file descriptor */ int num_cs; /* The number of command streams created. */ + struct radeon_info info; + struct pb_manager *kman; struct pb_manager *cman; - uint32_t pci_id; /* PCI ID */ - uint32_t gb_pipes; /* GB pipe count */ - uint32_t z_pipes; /* Z pipe count (rv530 only) */ - uint32_t gart_size; /* GART size. */ - uint32_t vram_size; /* VRAM size. */ uint32_t num_cpus; /* Number of CPUs. */ - unsigned drm_major; - unsigned drm_minor; - unsigned drm_patchlevel; - struct radeon_drm_cs *hyperz_owner; pipe_mutex hyperz_owner_mutex; struct radeon_drm_cs *cmask_owner; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 3a64e4abc35..915a9c5bad1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -26,6 +26,20 @@ /* The public winsys interface header for the radeon driver. */ +/* R300 features in DRM. + * + * 2.6.0: + * - Hyper-Z + * - GB_Z_PEQ_CONFIG on rv350->r4xx + * - R500 FG_ALPHA_VALUE + * + * 2.8.0: + * - R500 US_FORMAT regs + * - R500 ARGB2101010 colorbuffer + * - CMask and AA regs + * - R16F/RG16F + */ + #include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -55,38 +69,17 @@ struct radeon_winsys_cs { uint32_t *buf; /* The command buffer. */ }; -enum radeon_value_id { - RADEON_VID_PCI_ID, - RADEON_VID_R300_GB_PIPES, - RADEON_VID_R300_Z_PIPES, - RADEON_VID_GART_SIZE, - RADEON_VID_VRAM_SIZE, - RADEON_VID_DRM_MAJOR, - RADEON_VID_DRM_MINOR, - RADEON_VID_DRM_PATCHLEVEL, +struct radeon_info { + uint32_t pci_id; + uint32_t gart_size; + uint32_t vram_size; - /* These should probably go away: */ + uint32_t drm_major; /* version */ + uint32_t drm_minor; + uint32_t drm_patchlevel; - /* R300 features: - * - Hyper-Z - * - GB_Z_PEQ_CONFIG on rv350->r4xx - * - R500 FG_ALPHA_VALUE - * - * R600 features: - * - TBD - */ - RADEON_VID_DRM_2_6_0, - - /* R300 features: - * - R500 US_FORMAT regs - * - R500 ARGB2101010 colorbuffer - * - CMask and AA regs - * - R16F/RG16F - * - * R600 features: - * - TBD - */ - RADEON_VID_DRM_2_8_0, + uint32_t r300_num_gb_pipes; + uint32_t r300_num_z_pipes; }; enum radeon_feature_id { @@ -103,13 +96,13 @@ struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); /** - * Query a system value from a winsys. + * Query an info structure from winsys. * * \param ws The winsys this function is called from. - * \param vid One of the RADEON_VID_* enums. + * \param info Return structure */ - uint32_t (*get_value)(struct radeon_winsys *ws, - enum radeon_value_id vid); + void (*query_info)(struct radeon_winsys *ws, + struct radeon_info *info); /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. From ce9daf6f0bda857c9ee5d021cfb444db6376bfe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 19:14:23 +0200 Subject: [PATCH 082/600] winsys/radeon: add R300 infix to winsys feature names --- src/gallium/drivers/r300/r300_blit.c | 2 +- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r300/r300_flush.c | 2 +- src/gallium/targets/dri-r300/target.c | 1 - src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 ++-- src/gallium/winsys/radeon/drm/radeon_winsys.h | 4 ++-- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 388ebcdbf32..db97e496e19 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -247,7 +247,7 @@ static void r300_clear(struct pipe_context* pipe, if (!r300->hyperz_enabled) { r300->hyperz_enabled = r300->rws->cs_request_feature(r300->cs, - RADEON_FID_HYPERZ_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, TRUE); if (r300->hyperz_enabled) { /* Need to emit HyperZ buffer regs for the first time. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 2b3329e9f86..5c222588e47 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -99,7 +99,7 @@ static void r300_destroy_context(struct pipe_context* context) struct r300_context* r300 = r300_context(context); if (r300->cs && r300->hyperz_enabled) { - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE); + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } if (r300->blitter) diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 34f5419a864..4c6beea5a55 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -121,7 +121,7 @@ void r300_flush(struct pipe_context *pipe, } /* Release HyperZ. */ - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } r300->num_z_clears = 0; diff --git a/src/gallium/targets/dri-r300/target.c b/src/gallium/targets/dri-r300/target.c index b48bcad3710..9b6d816fb62 100644 --- a/src/gallium/targets/dri-r300/target.c +++ b/src/gallium/targets/dri-r300/target.c @@ -1,4 +1,3 @@ - #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "radeon/drm/radeon_drm_public.h" diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 473f388d121..5983e86c570 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -234,7 +234,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); switch (fid) { - case RADEON_FID_HYPERZ_RAM_ACCESS: + case RADEON_FID_R300_HYPERZ_ACCESS: if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, &cs->ws->hyperz_owner_mutex, @@ -243,7 +243,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, return FALSE; } - case RADEON_FID_CMASK_RAM_ACCESS: + case RADEON_FID_R300_CMASK_ACCESS: if (debug_get_bool_option("RADEON_CMASK", FALSE)) { return radeon_set_fd_access(cs, &cs->ws->cmask_owner, &cs->ws->cmask_owner_mutex, diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 915a9c5bad1..4fcda4f1b11 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -83,8 +83,8 @@ struct radeon_info { }; enum radeon_feature_id { - RADEON_FID_HYPERZ_RAM_ACCESS, /* ZMask + HiZ */ - RADEON_FID_CMASK_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ + RADEON_FID_R300_CMASK_ACCESS, }; struct radeon_winsys { From 67c995e0f1b50ff08784e97482ca3e9e0bfd42ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 19:20:25 +0200 Subject: [PATCH 083/600] winsys/radeon: little change in radeon_bo_is_referenced_by_cs --- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 339beedc6ab..b8b170adcbe 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -88,8 +88,9 @@ static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - return bo->num_cs_references == bo->rws->num_cs || - (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1); + int num_refs = bo->num_cs_references; + return num_refs == bo->rws->num_cs || + (num_refs && radeon_get_reloc(cs->csc, bo) != -1); } static INLINE boolean From e22a1005c0913b404ae82650cdc4f58bcbd5445b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 19:22:50 +0200 Subject: [PATCH 084/600] winsys/radeon: fix int->boolean conversion in radeon_bo_is_referenced_by_any_cs --- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index b8b170adcbe..ea2a820b30a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -112,7 +112,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) { - return bo->num_cs_references; + return bo->num_cs_references != 0; } void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs); From 7db148d3a5a350f80df8dc588e0079fda7aa378a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 23 Jul 2011 04:11:31 +0200 Subject: [PATCH 085/600] winsys/radeon: remove usage parameter from buffer_create --- src/gallium/drivers/r300/r300_flush.c | 1 - src/gallium/drivers/r300/r300_query.c | 3 +-- src/gallium/drivers/r300/r300_screen_buffer.c | 3 +-- src/gallium/drivers/r300/r300_texture.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 6 ++---- src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 -- 6 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 4c6beea5a55..dc596c4122a 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -76,7 +76,6 @@ void r300_flush(struct pipe_context *pipe, /* Create a fence, which is a dummy BO. */ *rfence = r300->rws->buffer_create(r300->rws, 1, 1, PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, RADEON_DOMAIN_GTT); /* Add the fence as a dummy relocation. */ r300->rws->cs_add_reloc(r300->cs, diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 000114129bf..c0357f9d035 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -57,8 +57,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, /* Open up the occlusion query buffer. */ q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, - PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, - q->domain); + PIPE_BIND_CUSTOM, q->domain); q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4154c81512e..c751a943b96 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -201,8 +201,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.b.width0, alignment, - rbuf->b.b.b.bind, rbuf->b.b.b.usage, - rbuf->domain); + rbuf->b.b.b.bind, rbuf->domain); if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 62c2f1fff6c..08fccbe51c5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -926,7 +926,7 @@ r300_texture_create_object(struct r300_screen *rscreen, if (!buffer) { tex->buf_size = tex->tex.size_in_bytes; tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, - base->bind, base->usage, tex->domain); + base->bind, tex->domain); if (!tex->buf) { FREE(tex); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index b6f12727e81..7f02a5abda5 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -468,8 +468,7 @@ static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle( get_radeon_bo(pb_buffer(_buf)); } -static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, - enum radeon_bo_domain domain) +static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain) { unsigned res = 0; @@ -487,7 +486,6 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, unsigned size, unsigned alignment, unsigned bind, - unsigned usage, enum radeon_bo_domain domain) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); @@ -497,7 +495,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, memset(&desc, 0, sizeof(desc)); desc.alignment = alignment; - desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); + desc.usage = get_pb_usage_from_create_flags(domain); /* Assign a buffer manager. */ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 4fcda4f1b11..2a0f025ebc4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -119,7 +119,6 @@ struct radeon_winsys { * \param size The size to allocate. * \param alignment An alignment of the buffer in memory. * \param bind A bitmask of the PIPE_BIND_* flags. - * \param usage A bitmask of the PIPE_USAGE_* flags. * \param domain A bitmask of the RADEON_DOMAIN_* flags. * \return The created buffer object. */ @@ -127,7 +126,6 @@ struct radeon_winsys { unsigned size, unsigned alignment, unsigned bind, - unsigned usage, enum radeon_bo_domain domain); struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)( From 533e2289235c61eff9a14bb24da7c8a1ff0b0afa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 22:14:39 +0200 Subject: [PATCH 086/600] winsys/radeon: manage constant buffers by the cache bufmgr too --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 7f02a5abda5..796262ccfdb 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -498,7 +498,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, desc.usage = get_pb_usage_from_create_flags(domain); /* Assign a buffer manager. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER)) provider = ws->cman; else provider = ws->kman; From f170555a18a742ed8ecb9e04cd02a5cb414c27ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 24 Jul 2011 23:59:44 +0200 Subject: [PATCH 087/600] winsys/radeon: fix typos in the driver interface --- src/gallium/winsys/radeon/drm/radeon_winsys.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 2a0f025ebc4..f8a4d3abd43 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -297,7 +297,7 @@ struct radeon_winsys { */ void (*cs_set_flush)(struct radeon_winsys_cs *cs, void (*flush)(void *ctx, unsigned flags), - void *user); + void *ctx); /** * Return TRUE if a buffer is referenced by a command stream. @@ -312,7 +312,8 @@ struct radeon_winsys { * Request access to a feature for a command stream. * * \param cs A command stream. - * \param fid A winsys buffer. + * \param fid Feature ID, one of RADEON_FID_* + * \param enable Whether to enable or disable the feature. */ boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, enum radeon_feature_id fid, From 84f8548dfcc7de55e162359e2e39af2614903cbe Mon Sep 17 00:00:00 2001 From: Tobias Droste Date: Mon, 18 Jul 2011 07:14:06 +0200 Subject: [PATCH 088/600] r300/compiler: simplify code in peephole_add_presub_add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tobias Droste Signed-off-by: Marek Olšák --- .../dri/r300/compiler/radeon_optimize.c | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index b24274259f4..39dcb21d4f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -561,28 +561,29 @@ static int peephole_add_presub_add( struct rc_instruction * inst_add) { unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - struct rc_src_register * src1 = NULL; - unsigned int i; - - if (!is_presub_candidate(c, inst_add)) - return 0; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; - /* XXX This isn't fully implemented, is it? */ - /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ - for (i = 0; i < 2; i++) { - if (inst_add->U.I.SrcReg[i].Abs) - return 0; + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; - /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */ - if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) { - src1 = &inst_add->U.I.SrcReg[i]; - } - } + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; - if (!src1) + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; + + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { @@ -615,7 +616,7 @@ static void presub_replace_inv( * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. - * @return + * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ From 99fba503b112a69a2fc14b7dd40684d9a6a1972a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 23 Jul 2011 15:57:51 +0200 Subject: [PATCH 089/600] configure.ac: do not check for llvm-config if llvm is disabled NOTE: This is a candidate for the 7.11 branch. --- configure.ac | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 0ea264ef042..5c832e64669 100644 --- a/configure.ac +++ b/configure.ac @@ -1699,9 +1699,6 @@ dnl Gallium configuration dnl if test "x$with_gallium_drivers" != x; then SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets" - AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no]) -else - LLVM_CONFIG=no fi AC_SUBST([LLVM_CFLAGS]) @@ -1821,6 +1818,8 @@ if test "x$enable_gallium_llvm" = xauto; then esac fi if test "x$enable_gallium_llvm" = xyes; then + AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no]) + if test "x$LLVM_CONFIG" != xno; then LLVM_VERSION=`$LLVM_CONFIG --version` LLVM_CFLAGS=`$LLVM_CONFIG --cppflags|sed 's/-DNDEBUG\>//g'` From c1e591eed41b45c0fcf1dcac8b1b8aaeb6237a38 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 18:33:40 -0700 Subject: [PATCH 090/600] glsl: Correctly return progress from lower_variable_index_to_cond_assign lower_variable_index_to_cond_assign runs until it can't make any more progress. It then returns the result of the last pass which will always be false. This caused the lowering loop in _mesa_ir_link_shader to end before doing one last round of lower_if_to_cond_assign. This caused several if-statements (resulting from lower_variable_index_to_cond_assign) to be left in the IR. In addition to this change, lower_variable_index_to_cond_assign should take a flag indicating whether or not it should even generate if-statements. This is easily controlled by switch_generator::linear_sequence_max_length. This would generate much better code on architectures without any flow contol. Fixes i915 piglit regressions glsl-texcoord-array and glsl-fs-vec4-indexing-temp-src. Reviewed-by: Eric Anholt --- src/glsl/lower_variable_index_to_cond_assign.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 7792e6e624f..f8e4a1de428 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -525,10 +525,12 @@ lower_variable_index_to_cond_assign(exec_list *instructions, * matrix columns of an array of matrix), each pass will only lower one * level of indirection. */ + bool progress_ever = false; do { v.progress = false; visit_list_elements(&v, instructions); + progress_ever = v.progress || progress_ever; } while (v.progress); - return v.progress; + return progress_ever; } From 3daa2d97eb13f41de4cbab9301a167be85d48642 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 15:39:03 -0700 Subject: [PATCH 091/600] i965/fs: Fix MRT drawing since the m0->m2 move for shader debug. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 1d89b8f1d11..eecfc92eb5b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (inst->target > 0) { /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->target)); } From 09916e877fc14723d7950f892e181df9f7d7f36f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 22 Jul 2011 15:25:55 -0700 Subject: [PATCH 092/600] mesa: Add utility function to get base format from a GL compressed format Reviewed-by: Brian Paul --- src/mesa/main/texcompress.c | 88 +++++++++++++++++++++++++++++++++++++ src/mesa/main/texcompress.h | 3 ++ 2 files changed, 91 insertions(+) diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index d820ae92747..040be943e82 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -39,6 +39,94 @@ #include "texcompress.h" +/** + * Get the GL base format of a specified GL compressed texture format + * + * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec: + * + * "Compressed Internal Format Base Internal Format Type + * --------------------------- -------------------- --------- + * COMPRESSED_ALPHA ALPHA Generic + * COMPRESSED_LUMINANCE LUMINANCE Generic + * COMPRESSED_LUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_INTENSITY INTENSITY Generic + * COMPRESSED_RED RED Generic + * COMPRESSED_RG RG Generic + * COMPRESSED_RGB RGB Generic + * COMPRESSED_RGBA RGBA Generic + * COMPRESSED_SRGB RGB Generic + * COMPRESSED_SRGB_ALPHA RGBA Generic + * COMPRESSED_SLUMINANCE LUMINANCE Generic + * COMPRESSED_SLUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_RED_RGTC1 RED Specific + * COMPRESSED_SIGNED_RED_RGTC1 RED Specific + * COMPRESSED_RG_RGTC2 RG Specific + * COMPRESSED_SIGNED_RG_RGTC2 RG Specific" + * + * \return + * The base format of \c format if \c format is a compressed format (either + * generic or specific. Otherwise 0 is returned. + */ +GLenum +_mesa_gl_compressed_format_base_format(GLenum format) +{ + switch (format) { + case GL_COMPRESSED_RED: + case GL_COMPRESSED_RED_RGTC1: + case GL_COMPRESSED_SIGNED_RED_RGTC1: + return GL_RED; + + case GL_COMPRESSED_RG: + case GL_COMPRESSED_RG_RGTC2: + case GL_COMPRESSED_SIGNED_RG_RGTC2: + return GL_RG; + + case GL_COMPRESSED_RGB: + case GL_COMPRESSED_SRGB: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return GL_RGB; + + case GL_COMPRESSED_RGBA: + case GL_COMPRESSED_SRGB_ALPHA: + case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return GL_RGBA; + + case GL_COMPRESSED_ALPHA: + return GL_ALPHA; + + case GL_COMPRESSED_LUMINANCE: + case GL_COMPRESSED_SLUMINANCE: + case GL_COMPRESSED_LUMINANCE_LATC1_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return GL_LUMINANCE; + + case GL_COMPRESSED_LUMINANCE_ALPHA: + case GL_COMPRESSED_SLUMINANCE_ALPHA: + case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: + return GL_LUMINANCE_ALPHA; + + case GL_COMPRESSED_INTENSITY: + return GL_INTENSITY; + + default: + return 0; + } +} + /** * Return list of (and count of) all specific texture compression * formats that are supported. diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 19b08bbadf6..922da00912d 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -33,6 +33,9 @@ struct gl_context; #if _HAVE_FULL_GL +extern GLenum +_mesa_gl_compressed_format_base_format(GLenum format); + extern GLuint _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); From 143b65f7612c255f29d08392192098b1c2bf4b62 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 22 Jul 2011 15:26:24 -0700 Subject: [PATCH 093/600] mesa: Return the correct internal fmt when a generic compressed fmt was used If an application requests a generic compressed format for a texture and the driver does not pick a specific compressed format, return the generic base format (e.g., GL_RGBA) for the GL_TEXTURE_INTERNAL_FORMAT query. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=3165 Reviewed-by: Brian Paul --- src/mesa/main/texparam.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 4b9dcb5d3b5..c4ec29533e2 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = _mesa_compressed_format_to_glenum(ctx, texFormat); } else { - /* return the user's requested internal format */ - *params = img->InternalFormat; - } + /* If the true internal format is not compressed but the user + * requested a generic compressed format, we have to return the + * generic base format that matches. + * + * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec: + * + * "If no specific compressed format is available, + * internalformat is instead replaced by the corresponding base + * internal format." + * + * Otherwise just return the user's requested internal format + */ + const GLenum f = + _mesa_gl_compressed_format_base_format(img->InternalFormat); + + *params = (f != 0) ? f : img->InternalFormat; + } break; case GL_TEXTURE_BORDER: *params = img->Border; From b189d1635d89cd7d900e8f9a5eed88d7dc0b46cb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 22 Jul 2011 16:45:50 -0700 Subject: [PATCH 094/600] mesa: Make _mesa_get_compressed_formats match the texture compression specs The implementation deviated slightly from the GL_EXT_texture_sRGB spec and from other implementations. A giant comment block was added to justify the somewhat odd behavior of this function. In addition, the interface had unnecessary cruft. The 'all' parameter was false at all callers, so it has been removed. Reviewed-by: Brian Paul --- src/mesa/main/get.c | 4 +- src/mesa/main/texcompress.c | 117 +++++++++++++++++++++++++++--------- src/mesa/main/texcompress.h | 2 +- 3 files changed, 93 insertions(+), 30 deletions(-) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 0492e1585c3..d32c68a53a4 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB: - v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE); + v->value_int = _mesa_get_compressed_formats(ctx, NULL); break; case GL_COMPRESSED_TEXTURE_FORMATS_ARB: v->value_int_n.n = - _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE); + _mesa_get_compressed_formats(ctx, v->value_int_n.ints); ASSERT(v->value_int_n.n <= 100); break; diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 040be943e82..42bd1eee5ca 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -131,16 +131,101 @@ _mesa_gl_compressed_format_base_format(GLenum format) * Return list of (and count of) all specific texture compression * formats that are supported. * + * Some formats are \b not returned by this function. The + * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are + * "suitable for general-purpose usage." All texture compression extensions + * have taken this to mean either linear RGB or linear RGBA. + * + * The GL_ARB_texture_compress_rgtc spec says: + * + * "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Compressed textures with just red or red-green components are + * not general-purpose so should not be returned by these queries + * because they have restrictions. + * + * Applications that seek to use the RGTC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is nearly identical wording in the GL_EXT_texture_compression_rgtc + * spec. + * + * The GL_EXT_texture_rRGB spec says: + * + * "22) Should the new COMPRESSED_SRGB_* formats be listed in an + * implementation's GL_COMPRESSED_TEXTURE_FORMATS list? + * + * RESOLVED: No. Section 3.8.1 says formats listed by + * GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose + * usage." The non-linear distribution of red, green, and + * blue for these sRGB compressed formats makes them not really + * general-purpose." + * + * The GL_EXT_texture_compression_latc spec says: + * + * "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Historically, OpenGL implementation have advertised the RGB and + * RGBA versions of the S3TC extensions compressed format tokens + * through this mechanism. + * + * The specification is not sufficiently clear about what "suitable + * for general-purpose usage" means. Historically that seems to mean + * unsigned RGB or unsigned RGBA. The DXT1 format supporting alpha + * (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at + * least for NVIDIA drivers) because the alpha is always 1.0 expect + * when it is 0.0 when RGB is required to be black. NVIDIA's even + * limits itself to true linear RGB or RGBA formats, specifically + * not including EXT_texture_sRGB's sRGB S3TC compressed formats. + * + * Adding luminance and luminance-alpha texture formats (and + * certainly signed versions of luminance and luminance-alpha + * formats!) invites potential comptaibility problems with old + * applications using this mechanism since old applications are + * unlikely to expect non-RGB or non-RGBA formats to be advertised + * through this mechanism. However no specific misinteractions + * with old applications is known. + * + * Applications that seek to use the LATC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is no formal spec for GL_ATI_texture_compression_3dc. Since the + * formats added by this extension are luminance-alpha formats, it is + * reasonable to expect them to follow the same rules as + * GL_EXT_texture_compression_latc. At the very least, Catalyst 11.6 does not + * expose the 3dc formats through this mechanism. + * * \param ctx the GL context * \param formats the resulting format list (may be NULL). - * \param all if true return all formats, even those with some kind - * of restrictions/limitations (See GL_ARB_texture_compression - * spec for more info). * * \return number of formats. */ GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all) +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; if (ctx->Extensions.TDFX_texture_compression_FXT1) { @@ -152,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 2; } } - /* don't return RGTC - ARB_texture_compression_rgtc query 19 */ + if (ctx->Extensions.EXT_texture_compression_s3tc) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - /* This format has some restrictions/limitations and so should - * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query. - * Specifically, all transparent pixels become black. NVIDIA - * omits this format too. - */ - if (all) - formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; } else { n += 3; - if (all) - n += 1; } } if (ctx->Extensions.S3_s3tc) { @@ -183,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 4; } } -#if FEATURE_EXT_texture_sRGB - if (ctx->Extensions.EXT_texture_sRGB) { - if (formats) { - formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; - } - else { - n += 4; - } - } -#endif /* FEATURE_EXT_texture_sRGB */ return n; #if FEATURE_ES1 || FEATURE_ES2 diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 922da00912d..375cf90c8a2 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -37,7 +37,7 @@ extern GLenum _mesa_gl_compressed_format_base_format(GLenum format); extern GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats); extern gl_format _mesa_glenum_to_compressed_format(GLenum format); From 95739f19ccc8d3915c437238ca057ddbecd193c6 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 25 Jul 2011 13:30:17 -0500 Subject: [PATCH 095/600] st/mesa: respect force_s3tc_enable environment variable NOTE: This is a candidate for the 7.10 and 7.11 branches. --- src/mesa/state_tracker/st_extensions.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99b231d9706..b5f6d356eb0 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st) } +static GLboolean st_get_s3tc_override(void) +{ + const char *override = _mesa_getenv("force_s3tc_enable"); + if (override && !strcmp(override, "true")) + return GL_TRUE; + return GL_FALSE; +} + + /** * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine * which GL extensions are supported. @@ -426,7 +435,7 @@ void st_init_extensions(struct st_context *st) if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW) && - ctx->Mesa_DXTn) { + (ctx->Mesa_DXTn || st_get_s3tc_override())) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; ctx->Extensions.S3_s3tc = GL_TRUE; } From 860c51d82711936d343b55aafb46befc8c032fe6 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 20 Jul 2011 17:35:22 -0500 Subject: [PATCH 096/600] util: enable S3TC support when the force_s3tc_enable env var is set to "true" NOTE: This is a candidate for the 7.10 and 7.11 branches. --- src/gallium/auxiliary/util/u_format_s3tc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index bb989c29d81..d8a7c0d453f 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -119,8 +119,15 @@ util_format_s3tc_init(void) library = util_dl_open(DXTN_LIBNAME); if (!library) { - debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " - "compression/decompression unavailable\n"); + if (getenv("force_s3tc_enable") && + !strcmp(getenv("force_s3tc_enable"), "true")) { + debug_printf("couldn't open " DXTN_LIBNAME ", enabling DXTn due to " + "force_s3tc_enable=true environment variable\n"); + util_format_s3tc_enabled = TRUE; + } else { + debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " + "compression/decompression unavailable\n"); + } return; } From 1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 Jul 2011 21:15:05 +0200 Subject: [PATCH 097/600] r300g: copy the compiler from r300c What a beast. r300g doesn't depend on files from r300c anymore, so r300c is now left to its own fate. BTW 'make test' can be invoked from the gallium/r300 directory to run some compiler unit tests. --- src/gallium/drivers/r300/Makefile | 54 +- src/gallium/drivers/r300/SConscript | 42 +- .../drivers/r300/compiler/memory_pool.c | 97 ++ .../drivers/r300/compiler/memory_pool.h | 80 ++ .../drivers/r300/compiler/r300_fragprog.c | 338 +++++ .../drivers/r300/compiler/r300_fragprog.h | 44 + .../r300/compiler/r300_fragprog_emit.c | 536 ++++++++ .../r300/compiler/r300_fragprog_swizzle.c | 243 ++++ .../r300/compiler/r300_fragprog_swizzle.h | 39 + .../drivers/r300/compiler/r3xx_fragprog.c | 172 +++ .../drivers/r300/compiler/r3xx_vertprog.c | 1045 +++++++++++++++ .../r300/compiler/r3xx_vertprog_dump.c | 207 +++ .../drivers/r300/compiler/r500_fragprog.c | 539 ++++++++ .../drivers/r300/compiler/r500_fragprog.h | 50 + .../r300/compiler/r500_fragprog_emit.c | 678 ++++++++++ .../drivers/r300/compiler/radeon_code.c | 187 +++ .../drivers/r300/compiler/radeon_code.h | 306 +++++ .../drivers/r300/compiler/radeon_compiler.c | 489 +++++++ .../drivers/r300/compiler/radeon_compiler.h | 171 +++ .../r300/compiler/radeon_compiler_util.c | 701 ++++++++++ .../r300/compiler/radeon_compiler_util.h | 89 ++ .../drivers/r300/compiler/radeon_dataflow.c | 892 +++++++++++++ .../drivers/r300/compiler/radeon_dataflow.h | 134 ++ .../r300/compiler/radeon_dataflow_deadcode.c | 359 +++++ .../r300/compiler/radeon_dataflow_swizzles.c | 103 ++ .../r300/compiler/radeon_emulate_branches.c | 342 +++++ .../r300/compiler/radeon_emulate_branches.h | 30 + .../r300/compiler/radeon_emulate_loops.c | 522 ++++++++ .../r300/compiler/radeon_emulate_loops.h | 32 + .../drivers/r300/compiler/radeon_list.c | 90 ++ .../drivers/r300/compiler/radeon_list.h | 46 + .../drivers/r300/compiler/radeon_opcodes.c | 546 ++++++++ .../drivers/r300/compiler/radeon_opcodes.h | 263 ++++ .../drivers/r300/compiler/radeon_optimize.c | 700 ++++++++++ .../r300/compiler/radeon_pair_dead_sources.c | 62 + .../r300/compiler/radeon_pair_regalloc.c | 706 ++++++++++ .../r300/compiler/radeon_pair_schedule.c | 1010 +++++++++++++++ .../r300/compiler/radeon_pair_translate.c | 359 +++++ .../drivers/r300/compiler/radeon_program.c | 225 ++++ .../drivers/r300/compiler/radeon_program.h | 206 +++ .../r300/compiler/radeon_program_alu.c | 1154 +++++++++++++++++ .../r300/compiler/radeon_program_alu.h | 66 + .../r300/compiler/radeon_program_constants.h | 190 +++ .../r300/compiler/radeon_program_pair.c | 239 ++++ .../r300/compiler/radeon_program_pair.h | 137 ++ .../r300/compiler/radeon_program_print.c | 418 ++++++ .../r300/compiler/radeon_program_tex.c | 528 ++++++++ .../r300/compiler/radeon_program_tex.h | 39 + .../r300/compiler/radeon_remove_constants.c | 150 +++ .../r300/compiler/radeon_remove_constants.h | 35 + .../r300/compiler/radeon_rename_regs.c | 92 ++ .../r300/compiler/radeon_rename_regs.h | 9 + .../drivers/r300/compiler/radeon_swizzle.h | 57 + .../drivers/r300/compiler/radeon_variable.c | 517 ++++++++ .../drivers/r300/compiler/radeon_variable.h | 89 ++ .../drivers/r300/compiler/tests/.gitignore | 1 + .../drivers/r300/compiler/tests/Makefile | 53 + .../tests/radeon_compiler_util_tests.c | 76 ++ .../r300/compiler/tests/rc_test_helpers.c | 380 ++++++ .../r300/compiler/tests/rc_test_helpers.h | 13 + .../drivers/r300/compiler/tests/unit_test.c | 35 + .../drivers/r300/compiler/tests/unit_test.h | 17 + src/gallium/drivers/r300/r300_emit.h | 1 - src/gallium/drivers/r300/r300_fs.c | 3 +- src/gallium/drivers/r300/r300_fs.h | 2 +- src/gallium/drivers/r300/r300_reg.h | 21 +- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 3 +- src/gallium/drivers/r300/r300_vs.c | 2 +- src/gallium/drivers/r300/r300_vs.h | 2 +- 69 files changed, 17038 insertions(+), 25 deletions(-) create mode 100644 src/gallium/drivers/r300/compiler/memory_pool.c create mode 100644 src/gallium/drivers/r300/compiler/memory_pool.h create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog.c create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog.h create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog_emit.c create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h create mode 100644 src/gallium/drivers/r300/compiler/r3xx_fragprog.c create mode 100644 src/gallium/drivers/r300/compiler/r3xx_vertprog.c create mode 100644 src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c create mode 100644 src/gallium/drivers/r300/compiler/r500_fragprog.c create mode 100644 src/gallium/drivers/r300/compiler/r500_fragprog.h create mode 100644 src/gallium/drivers/r300/compiler/r500_fragprog_emit.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_code.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_code.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler_util.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler_util.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_branches.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_branches.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_loops.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_loops.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_list.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_list.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_opcodes.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_opcodes.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_optimize.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_schedule.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_translate.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_program.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_program.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_alu.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_alu.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_constants.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_pair.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_pair.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_print.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_tex.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_tex.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_remove_constants.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_remove_constants.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_rename_regs.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_rename_regs.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_swizzle.h create mode 100644 src/gallium/drivers/r300/compiler/radeon_variable.c create mode 100644 src/gallium/drivers/r300/compiler/radeon_variable.h create mode 100644 src/gallium/drivers/r300/compiler/tests/.gitignore create mode 100644 src/gallium/drivers/r300/compiler/tests/Makefile create mode 100644 src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c create mode 100644 src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c create mode 100644 src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h create mode 100644 src/gallium/drivers/r300/compiler/tests/unit_test.c create mode 100644 src/gallium/drivers/r300/compiler/tests/unit_test.h diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 4088216adcb..4f021276a8f 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -26,19 +26,51 @@ C_SOURCES = \ r300_texture.c \ r300_texture_desc.c \ r300_tgsi_to_rc.c \ - r300_transfer.c + r300_transfer.c \ + \ + compiler/radeon_code.c \ + compiler/radeon_compiler.c \ + compiler/radeon_compiler_util.c \ + compiler/radeon_emulate_branches.c \ + compiler/radeon_emulate_loops.c \ + compiler/radeon_program.c \ + compiler/radeon_program_print.c \ + compiler/radeon_opcodes.c \ + compiler/radeon_program_alu.c \ + compiler/radeon_program_pair.c \ + compiler/radeon_program_tex.c \ + compiler/radeon_pair_translate.c \ + compiler/radeon_pair_schedule.c \ + compiler/radeon_pair_regalloc.c \ + compiler/radeon_pair_dead_sources.c \ + compiler/radeon_dataflow.c \ + compiler/radeon_dataflow_deadcode.c \ + compiler/radeon_dataflow_swizzles.c \ + compiler/radeon_list.c \ + compiler/radeon_optimize.c \ + compiler/radeon_remove_constants.c \ + compiler/radeon_rename_regs.c \ + compiler/radeon_variable.c \ + compiler/r3xx_fragprog.c \ + compiler/r300_fragprog.c \ + compiler/r300_fragprog_swizzle.c \ + compiler/r300_fragprog_emit.c \ + compiler/r500_fragprog.c \ + compiler/r500_fragprog_emit.c \ + compiler/r3xx_vertprog.c \ + compiler/r3xx_vertprog_dump.c \ + compiler/memory_pool.c \ + \ + $(TOP)/src/glsl/ralloc.c \ + $(TOP)/src/mesa/program/register_allocate.c + LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/include - -COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a - -EXTRA_OBJECTS = \ - $(COMPILER_ARCHIVE) + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/glsl include ../../Makefile.template -.PHONY: $(COMPILER_ARCHIVE) -$(COMPILER_ARCHIVE): - $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler +test: default + @$(MAKE) -s -C compiler/tests/ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 3af157a7956..7ffd1c27c96 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -1,13 +1,11 @@ Import('*') -r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') - env = env.Clone() -# add the paths for r300compiler env.Append(CPPPATH = [ - '#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa', + '#/src/glsl', + '#/src/mapi', ]) r300 = env.ConvenienceLibrary( @@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary( 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', - ] + r300compiler) + r300compiler + 'compiler/radeon_code.c', + 'compiler/radeon_compiler.c', + 'compiler/radeon_compiler_util.c', + 'compiler/radeon_program.c', + 'compiler/radeon_program_print.c', + 'compiler/radeon_opcodes.c', + 'compiler/radeon_program_alu.c', + 'compiler/radeon_program_pair.c', + 'compiler/radeon_program_tex.c', + 'compiler/radeon_pair_translate.c', + 'compiler/radeon_pair_schedule.c', + 'compiler/radeon_pair_regalloc.c', + 'compiler/radeon_pair_dead_sources.c', + 'compiler/radeon_optimize.c', + 'compiler/radeon_remove_constants.c', + 'compiler/radeon_rename_regs.c', + 'compiler/radeon_emulate_branches.c', + 'compiler/radeon_emulate_loops.c', + 'compiler/radeon_dataflow.c', + 'compiler/radeon_dataflow_deadcode.c', + 'compiler/radeon_dataflow_swizzles.c', + 'compiler/radeon_variable.c', + 'compiler/radeon_list.c', + 'compiler/r3xx_fragprog.c', + 'compiler/r300_fragprog.c', + 'compiler/r300_fragprog_swizzle.c', + 'compiler/r300_fragprog_emit.c', + 'compiler/r500_fragprog.c', + 'compiler/r500_fragprog_emit.c', + 'compiler/r3xx_vertprog.c', + 'compiler/r3xx_vertprog_dump.c', + 'compiler/memory_pool.c', + '#/src/glsl/ralloc.c', + '#/src/mesa/program/register_allocate.c' + ]) env.Alias('r300', r300) diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c new file mode 100644 index 00000000000..ddcdddf9e3c --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.c @@ -0,0 +1,97 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include +#include +#include + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 8 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + void * ptr; + + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h new file mode 100644 index 00000000000..42344d0e3ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.h @@ -0,0 +1,80 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + +#endif /* MEMORY_POOL_H */ diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c new file mode 100644 index 00000000000..deba9ca834d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c @@ -0,0 +1,338 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include + +#include "../r300_reg.h" + +static void presub_string(char out[10], unsigned int inst) +{ + switch(inst & 0x600000){ + case R300_ALU_SRCP_1_MINUS_2_SRC0: + sprintf(out, "bias"); + break; + case R300_ALU_SRCP_SRC1_MINUS_SRC0: + sprintf(out, "sub"); + break; + case R300_ALU_SRCP_SRC1_PLUS_SRC0: + sprintf(out, "add"); + break; + case R300_ALU_SRCP_1_MINUS_SRC0: + sprintf(out, "inv "); + break; + } +} + +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[4][10], dstc[20]; + char srca[4][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', (rega & 31) | msba); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + + sprintf(dstc, "t%i.%s ", + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> 29) & 3, + flags); + strcat(dstc, tmp); + } + /* Presub */ + presub_string(srcc[3], code->alu.inst[i].rgb_inst); + presub_string(srca[3], code->alu.inst[i].alpha_inst); + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dsta, "t%i.w ", + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> 25) & 3); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" + " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], srcc[3], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], srca[3], dsta, + code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d < 20 ) { + switch(d) { + case R300_ALU_ARGC_SRCP_XYZ: + sprintf(buf, "srcp.xyz"); + break; + case R300_ALU_ARGC_SRCP_XXX: + sprintf(buf, "srcp.xxx"); + break; + case R300_ALU_ARGC_SRCP_YYY: + sprintf(buf, "srcp.yyy"); + break; + case R300_ALU_ARGC_SRCP_ZZZ: + sprintf(buf, "srcp.zzz"); + break; + case R300_ALU_ARGC_SRCP_WWW: + sprintf(buf, "srcp.www"); + break; + } + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d < 16) { + switch(d) { + case R300_ALU_ARGA_SRCP_X: + sprintf(buf, "srcp.x"); + break; + case R300_ALU_ARGA_SRCP_Y: + sprintf(buf, "srcp.y"); + break; + case R300_ALU_ARGA_SRCP_Z: + sprintf(buf, "srcp.z"); + break; + case R300_ALU_ARGA_SRCP_W: + sprintf(buf, "srcp.w"); + break; + } + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, + code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? + "NOP" : "", + arga[0], arga[1],arga[2], + code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h new file mode 100644 index 00000000000..0c88bab2f33 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 00000000000..e6fd1fde62d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs + * + * \author Jerome Glisse + */ + +#include "r300_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" + + +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) +{ + if (index > code->pixsize) + code->pixsize = index; +} + +static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + if (!src.Used) + return 0; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index & 0x1f; + } + + return 0; +} + + +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTC_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTA_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) +{ + int ip; + int j; + PROG_CODE; + + if (code->alu.length >= c->Base.max_alu_insts) { + error("Too many ALU instructions"); + return 0; + } + + ip = code->alu.length++; + + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + /* Set the RGB address */ + unsigned int src = use_source(code, inst->RGB.Src[j]); + unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + + code->alu.inst[ip].rgb_addr |= src << (6*j); + + /* Set the Alpha address */ + src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + + code->alu.inst[ip].alpha_addr |= src << (6*j); + + arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7*j); + } + + /* Presubtract */ + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; + code->alu.inst[ip].rgb_addr |= + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); + emit->node_flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; + code->alu.inst[ip].alpha_addr |= + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = 1; + } + if (inst->Nop) + code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; + + return 1; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static int finish_node(struct r300_emit_state * emit) +{ + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + unsigned alu_offset; + unsigned alu_end; + unsigned tex_offset; + unsigned tex_end; + + unsigned int alu_offset_msbs, alu_end_msbs; + + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return 0; + } + + alu_offset = emit->node_first_alu; + alu_end = code->alu.length - alu_offset - 1; + tex_offset = emit->node_first_tex; + tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return 0; + } + + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } + + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } + return 1; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static int begin_tex(struct r300_emit_state * emit) +{ + PROG_CODE; + + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return 1; + } + + if (emit->current_node == 3) { + error("Too many texture indirections"); + return 0; + } + + if (!finish_node(emit)) + return 0; + + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return 1; +} + + +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) +{ + unsigned int unit; + unsigned int dest; + unsigned int opcode; + PROG_CODE; + + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { + error("Too many TEX instructions"); + return 0; + } + + unit = inst->U.I.TexSrcUnit; + dest = inst->U.I.DstReg.Index; + + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); + return 0; + } + + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->U.I.SrcReg[0].Index); + + code->tex.inst[code->tex.length++] = + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; + return 1; +} + + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; + + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); + + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; + code->code_offset = + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 00000000000..b7bca8c0cfa --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include + +#include "../r300_reg.h" +#include "radeon_compiler.h" + +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) + +struct swizzle_data { + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ + unsigned int srcp_stride; /**< difference in base between arg0/scrp */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + +/** + * Determines if the given swizzle is valid for r300/r400. In most situations + * it is better to use r300_swizzle_is_native() which can be accesed via + * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). + */ +int r300_swizzle_is_native_basic(unsigned int swizzle) +{ + if(lookup_native_swizzle(swizzle)) + return 1; + else + return 0; +} + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + const struct swizzle_data* sd; + unsigned int relevant; + int j; + + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { + if (reg.Abs || reg.Negate) + return 0; + + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != j) + return 0; + } + + return 1; + } + + relevant = 0; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) + return 0; + + return 1; +} + + +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) +{ + split->NumPhases = 0; + + while(mask) { + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + unsigned int matchcount = 0; + unsigned int matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz; + if (!GET_BIT(mask, comp)) + continue; + swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (mask & RC_MASK_XYZ)) + break; + } + } + + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; + + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; + } +} + +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); + return 0; + } + + if (src == RC_PAIR_PRESUB_SRC) { + return sd->base + sd->srcp_stride; + } else { + return sd->base + src*sd->stride; + } +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) +{ + unsigned int swz = GET_SWZ(swizzle, 0); + if (src == RC_PAIR_PRESUB_SRC) { + return R300_ALU_ARGA_SRCP_X + swz; + } + if (swz < 3) + return swz + 3*src; + + switch(swz) { + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 00000000000..f2635be140d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "radeon_swizzle.h" + +extern struct rc_swizzle_caps r300_swizzle_caps; + +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); +int r300_swizzle_is_native_basic(unsigned int swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c new file mode 100644 index 00000000000..bb6c010e8e3 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,172 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_program_alu.h" +#include "radeon_program_tex.h" +#include "radeon_rename_regs.h" +#include "radeon_remove_constants.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); +} + +static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); + + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); + } + } +} + +static int radeon_saturate_output( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) + return 0; + + inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + return 1; +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + int sat_out = c->state.frag_clamp; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation saturate_output[] = { + { &radeon_saturate_output, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_tex[] = { + { &radeonTransformTEX, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_if[] = { + { &r500_transform_IF, 0 }, + {0, 0} + }; + + struct radeon_program_transformation native_rewrite_r500[] = { + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation native_rewrite_r300[] = { + { &radeonTransformALU, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass fs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, + /* This transformation needs to be done before any of the IF + * instructions are modified. */ + {"transform KILP", 1, 1, rc_transform_KILP, NULL}, + {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, + {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, + {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, + {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, + {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + /* This pass makes it easier for the scheduler to group TEX + * instructions and reduces the chances of creating too + * many texture indirections.*/ + {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"pair translate", 1, 1, rc_pair_translate, NULL}, + {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, + {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, &opt}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, + {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, + {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_FRAGMENT_PROGRAM; + c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; + + rc_run_compiler(&c->Base, fs_list); + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c new file mode 100644 index 00000000000..654f9a070d5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1045 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include + +#include "../r300_reg.h" + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_program_alu.h" +#include "radeon_swizzle.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_remove_constants.h" + +struct loop { + int BgnLoop; + +}; + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(unsigned int mask) +{ + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; +} + +static unsigned long t_dst_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case RC_FILE_OUTPUT: + return PVS_DST_REG_OUT; + case RC_FILE_ADDRESS: + return PVS_DST_REG_A0; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case RC_FILE_INPUT: + return PVS_SRC_REG_INPUT; + case RC_FILE_CONSTANT: + return PVS_SRC_REG_CONSTANT; + } +} + +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return 0; + if (aclass == PVS_SRC_REG_TEMPORARY) + return 0; + + if (a.RelAddr || b.RelAddr) + return 1; + if (a.Index != b.Index) + return 1; + + return 0; +} + +static inline unsigned long t_swizzle(unsigned int swizzle) +{ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + if (src->File == RC_FILE_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { + assert(dst->Index == 0); + } + + return 1; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + unsigned int i; + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + 0, + 1, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (i = 0; i < 3; i++) { + unsigned int j; + if (vpi->SrcReg[i].File != RC_FILE_NONE) + continue; + + for (j = 0; j < 3; j++) { + if (i != j) { + vpi->SrcReg[i].Index = + vpi->SrcReg[j].Index; + break; + } + } + } + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + +static void mark_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * writemasks = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= R300_VS_MAX_TEMPS) + return; + + writemasks[index] |= mask; +} + +static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) +{ + return PVS_SRC_OPERAND(compiler->PredicateIndex, + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_W), + t_src_class(RC_FILE_TEMPORARY), + 0); +} + +static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, + unsigned int hw_opcode, int is_math) +{ + return PVS_OP_DST_OPERAND(hw_opcode, + is_math, + 0, + compiler->PredicateIndex, + RC_MASK_W, + t_dst_class(RC_FILE_TEMPORARY)); + +} + +static void ei_if(struct r300_vertex_program_compiler * compiler, + struct rc_instruction *rci, + unsigned int * inst, + unsigned int branch_depth) +{ + unsigned int predicate_opcode; + int is_math = 0; + + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode IF not supported\n"); + return; + } + + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (!compiler->PredicateMask) { + unsigned int writemasks[RC_REGISTER_MAX_INDEX]; + struct rc_instruction * inst; + unsigned int i; + memset(writemasks, 0, sizeof(writemasks)); + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } + for(i = 0; i < compiler->Base.max_temp_regs; i++) { + unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; + /* Only the W component can be used fo the predicate + * stack counter. */ + if (mask & RC_MASK_W) { + compiler->PredicateMask = RC_MASK_W; + compiler->PredicateIndex = i; + break; + } + } + if (i == compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "No free temporary to use for" + " predicate stack counter.\n"); + return; + } + } + predicate_opcode = + branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; + + rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); + if (branch_depth == 0) { + is_math = 1; + predicate_opcode = ME_PRED_SET_NEQ; + inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + inst[2] = 0; + } else { + predicate_opcode = VE_PRED_SET_NEQ_PUSH; + inst[1] = t_pred_src(compiler); + inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + } + + inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); + inst[3] = 0; + +} + +static void ei_else(struct r300_vertex_program_compiler * compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ELSE not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void ei_endif(struct r300_vertex_program_compiler *compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void translate_vertex_program(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *rci; + + struct loop * loops = NULL; + int current_loop_depth = 0; + int loops_reserved = 0; + + unsigned int branch_depth = 0; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + compiler->code->num_temporaries = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; + const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (info->HasDstReg) { + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } + + if (compiler->code->length >= c->max_alu_insts * 4) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && + vpi->Opcode != RC_OPCODE_SNE)); + + switch (vpi->Opcode) { + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_ELSE: ei_else(compiler, inst); break; + case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l; + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; + + assert(loops); + l = &loops[current_loop_depth - 1]; + act_addr = l->BgnLoop - 1; + last_addr = (compiler->code->length / 4) - 1; + ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + + default: + rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); + return; + } + + /* Non-flow control instructions that are inside an if statement + * need to pay attention to the predicate bit. */ + if (branch_depth + && vpi->Opcode != RC_OPCODE_IF + && vpi->Opcode != RC_OPCODE_ELSE + && vpi->Opcode != RC_OPCODE_ENDIF) { + + inst[0] |= (PVS_DST_PRED_ENABLE_MASK + << PVS_DST_PRED_ENABLE_SHIFT); + inst[0] |= (PVS_DST_PRED_SENSE_MASK + << PVS_DST_PRED_SENSE_SHIFT); + } + + /* Update the number of temporaries. */ + if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && + vpi->DstReg.Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->DstReg.Index + 1; + + for (unsigned i = 0; i < info->NumSrcRegs; i++) + if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && + vpi->SrcReg[i].Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; + + if (compiler->PredicateMask) + if (compiler->PredicateIndex >= compiler->code->num_temporaries) + compiler->code->num_temporaries = compiler->PredicateIndex + 1; + + if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "Too many temporaries.\n"); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + unsigned int Allocated:1; + unsigned int HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; + unsigned int num_orig_temps = 0; + char hwtemps[RC_REGISTER_MAX_INDEX]; + struct temporary_allocation * ta; + unsigned int i, j; + + memset(hwtemps, 0, sizeof(hwtemps)); + + rc_recompute_ips(c); + + /* Pass 1: Count original temporaries. */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; + } + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < c->max_temp_regs; ++j) { + if (!hwtemps[j]) + break; + } + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[ta[orig].HwTemp] = 1; + } + + inst->U.I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + +/** + * R3xx-R4xx vertex engine does not support the Absolute source operand modifier + * and the Saturate opcode modifier. Only Absolute is currently transformed. + */ +static int transform_nonnative_modifiers( + struct radeon_compiler *c, + struct rc_instruction *inst, + void* unused) +{ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + /* Transform ABS(a) to MAX(a, -a). */ + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].Abs) { + struct rc_instruction *new_inst; + unsigned temp; + + inst->U.I.SrcReg[i].Abs = 0; + + temp = rc_find_free_temporary(c); + + new_inst = rc_insert_new_instruction(c, inst->Prev); + new_inst->U.I.Opcode = RC_OPCODE_MAX; + new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + new_inst->U.I.DstReg.Index = temp; + new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + + memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = temp; + inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + } + } + return 1; +} + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static int transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; + } + } + + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; + } + } + + return 1; +} + +static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_vertex_program_compiler * c = userdata; + int i; + + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); + } +} + +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + (void) opcode; + (void) reg; + + return 1; +} + +static void transform_negative_addressing(struct r300_vertex_program_compiler *c, + struct rc_instruction *arl, + struct rc_instruction *end, + int min_offset) +{ + struct rc_instruction *inst, *add; + unsigned const_swizzle; + + /* Transform ARL */ + add = rc_insert_new_instruction(&c->Base, arl->Prev); + add->U.I.Opcode = RC_OPCODE_ADD; + add->U.I.DstReg.File = RC_FILE_TEMPORARY; + add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); + add->U.I.DstReg.WriteMask = RC_MASK_X; + add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; + add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, + min_offset, &const_swizzle); + add->U.I.SrcReg[1].Swizzle = const_swizzle; + + arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; + arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; + + /* Rewrite offsets up to and excluding inst. */ + for (inst = arl->Next; inst != end; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) + if (inst->U.I.SrcReg[i].RelAddr) + inst->U.I.SrcReg[i].Index -= min_offset; + } +} + +static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler; + struct rc_instruction *inst, *lastARL = NULL; + int min_offset = 0; + + for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (inst->U.I.Opcode == RC_OPCODE_ARL) { + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); + + lastARL = inst; + min_offset = 0; + continue; + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].RelAddr && + inst->U.I.SrcReg[i].Index < 0) { + /* ARL must precede any indirect addressing. */ + if (lastARL == NULL) { + rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); + return; + } + + if (inst->U.I.SrcReg[i].Index < min_offset) + min_offset = inst->U.I.SrcReg[i].Index; + } + } + } + + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); +} + +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation alu_rewrite_r500[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_scale_vertex, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation alu_rewrite_r300[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* Note: These passes have to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * or non-native modifiers will not be treated properly. + */ + struct radeon_program_transformation emulate_modifiers[] = { + { &transform_nonnative_modifiers, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation resolve_src_conflicts[] = { + { &transform_source_conflicts, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass vs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, + {"transform loops", 1, 1, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, + {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300}, + {"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + /* This pass must be done after optimizations. */ + {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, + {"register allocation", 1, opt, allocate_temporary_registers, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, 1, translate_vertex_program, NULL}, + {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_VERTEX_PROGRAM; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + + rc_run_compiler(&c->Base, vs_list); + + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 00000000000..2bc0a87eed8 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,207 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" +#include "radeon_code.h" +#include "../r300_reg.h" + +#include + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_COND_MUX_EQ", + " VE_COND_MUX_GT", + " VE_COND_MUX_GTE", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", + (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; + struct r300_vertex_program_code * vs = c->code; + unsigned instrcount = vs->length / 4; + unsigned i; + + fprintf(stderr, "Final vertex program code:\n"); + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c new file mode 100644 index 00000000000..cf99f5e4538 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c @@ -0,0 +1,539 @@ +/* + * Copyright 2008 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include + +#include "radeon_compiler_util.h" +#include "radeon_list.h" +#include "radeon_variable.h" +#include "../r300_reg.h" + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void *data) +{ + struct rc_variable * writer; + struct rc_list * writer_list, * list_ptr; + struct rc_list * var_list = rc_get_variables(c); + unsigned int generic_if = 0; + unsigned int alu_chan; + + if (inst_if->U.I.Opcode != RC_OPCODE_IF) { + return 0; + } + + writer_list = rc_variable_list_get_writers( + var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { + + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + struct rc_instruction * inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } + + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } + + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction * inst_mov = + rc_insert_new_instruction(c, inst_if->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int reverse_srcs = 0; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch(writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SLE: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SGT: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_SUB; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + if (reverse_srcs) { + struct rc_src_register temp_src; + temp_src = writer->Inst->U.I.SrcReg[0]; + writer->Inst->U.I.SrcReg[0] = + writer->Inst->U.I.SrcReg[1]; + writer->Inst->U.I.SrcReg[1] = temp_src; + } + } + } + + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; + + return 1; +} + +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + unsigned int relevant; + int i; + + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_TXD || + opcode == RC_OPCODE_TXL || + opcode == RC_OPCODE_KIL) { + if (reg.Abs) + return 0; + + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return 0; + } + + if (reg.Negate) + return 0; + + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; + + return 0; + } else { + /* ALU instructions support almost everything */ + relevant = 0; + for(i = 0; i < 3; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + return 1; + } +} + +/** + * Split source register access. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. + */ +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) +{ + unsigned int negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + split->NumPhases = 0; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + split->Phase[split->NumPhases++] = negatebase[i]; + } +} + +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "H"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + int n, i; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3, (inst >> 29) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case R500_INST_TYPE_FC: + fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + /* JUMP_FUNC JUMP_ANY*/ + fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, + (inst & R500_FC_JUMP_ANY) >> 5); + + /* OP */ + switch(inst & 0x7){ + case R500_FC_OP_JUMP: + fprintf(stderr, "JUMP"); + break; + case R500_FC_OP_LOOP: + fprintf(stderr, "LOOP"); + break; + case R500_FC_OP_ENDLOOP: + fprintf(stderr, "ENDLOOP"); + break; + case R500_FC_OP_REP: + fprintf(stderr, "REP"); + break; + case R500_FC_OP_ENDREP: + fprintf(stderr, "ENDREP"); + break; + case R500_FC_OP_BREAKLOOP: + fprintf(stderr, "BREAKLOOP"); + break; + case R500_FC_OP_BREAKREP: + fprintf(stderr, "BREAKREP"); + break; + case R500_FC_OP_CONTINUE: + fprintf(stderr, "CONTINUE"); + break; + } + fprintf(stderr," "); + /* A_OP */ + switch(inst & (0x3 << 6)){ + case R500_FC_A_OP_NONE: + fprintf(stderr, "NONE"); + break; + case R500_FC_A_OP_POP: + fprintf(stderr, "POP"); + break; + case R500_FC_A_OP_PUSH: + fprintf(stderr, "PUSH"); + break; + } + /* B_OP0 B_OP1 */ + for(i=0; i<2; i++){ + fprintf(stderr, " "); + switch(inst & (0x3 << (24 + (i * 2)))){ + /* R500_FC_B_OP0_NONE + * R500_FC_B_OP1_NONE */ + case 0: + fprintf(stderr, "NONE"); + break; + case R500_FC_B_OP0_DECR: + case R500_FC_B_OP1_DECR: + fprintf(stderr, "DECR"); + break; + case R500_FC_B_OP0_INCR: + case R500_FC_B_OP1_INCR: + fprintf(stderr, "INCR"); + break; + } + } + /*POP_CNT B_ELSE */ + fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); + inst = code->inst[n].inst3; + /* JUMP_ADDR */ + fprintf(stderr, " %d", inst >> 16); + + if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ + fprintf(stderr, " IGN_UNC"); + } + inst = code->inst[n].inst3; + fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); + fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", + inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); + break; + case R500_INST_TYPE_TEX: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h new file mode 100644 index 00000000000..6aa448cc6f7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + +extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); + +extern struct rc_swizzle_caps r500_swizzle_caps; + +extern int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void* data); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 00000000000..c30cd753d15 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,678 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \author Corbin Simpson + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" + +#define PROG_CODE \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct r500_loop_info { + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + struct r500_loop_info * Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + + unsigned int MaxBranchDepth; + +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_CND: return R500_ALPHA_OP_CND; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static unsigned int fix_hw_swizzle(unsigned int swz) +{ + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: + swz = 4; + break; + case RC_SWIZZLE_HALF: + swz = 5; + break; + case RC_SWIZZLE_ONE: + swz = 6; + break; + } + + return swz; +} + +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) +{ + unsigned int t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) +{ + unsigned int t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + /* From docs: + * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. + * MSB = 1 << 7 */ + if (!src.Used) + return 1 << 7; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | R500_RGB_ADDR0_CONST; + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} + +/** + * Emit a paired ALU instruction. + */ +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_alu: Too many instructions"); + return; + } + + ip = ++code->inst_end; + + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + if (inst->WriteALUResult) { + error("Cannot write output and ALU result at the same time"); + return; + } + } else { + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); + code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = 1; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + /* Set the presubtract operation. */ + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } +} + +static unsigned int translate_strq_swizzle(unsigned int swizzle) +{ + unsigned int swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_tex: Too many instructions"); + return 0; + } + + ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE; + + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case RC_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RC_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RC_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RC_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + case RC_OPCODE_TXD: + code->inst[ip].inst1 |= R500_TEX_INST_DXDY; + break; + case RC_OPCODE_TXL: + code->inst[ip].inst1 |= R500_TEX_INST_LOD; + break; + default: + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); + } + + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | (GET_SWZ(inst->TexSwizzle, 0) << 24) + | (GET_SWZ(inst->TexSwizzle, 1) << 26) + | (GET_SWZ(inst->TexSwizzle, 2) << 28) + | (GET_SWZ(inst->TexSwizzle, 3) << 30) + ; + + if (inst->Opcode == RC_OPCODE_TXD) { + use_temporary(code, inst->SrcReg[1].Index); + use_temporary(code, inst->SrcReg[2].Index); + + /* DX and DY parameters are specified in a separate register. */ + code->inst[ip].inst3 = + R500_DX_ADDR(inst->SrcReg[1].Index) | + (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | + R500_DY_ADDR(inst->SrcReg[2].Index) | + (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); + } + + return 1; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + unsigned int newip; + + if (s->Code->inst_end >= s->C->max_alu_insts-1) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + newip = ++s->Code->inst_end; + + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + switch(inst->U.I.Opcode){ + struct branch_info * branch; + struct r500_loop_info * loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, + s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + + loop = &s->Loops[s->CurrentLoopDepth++]; + memset(loop, 0, sizeof(struct r500_loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; + break; + case RC_OPCODE_BRK: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_CONT: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_ENDLOOP: + { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } + s->CurrentLoopDepth--; + break; + } + case RC_OPCODE_IF: + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); + + branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ELSE: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ENDIF: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + + s->CurrentBranchDepth--; + break; + default: + rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); + } +} + +void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct emit_state s; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + + if (compiler->Base.Error) + return; + + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; + + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= compiler->Base.max_alu_insts-1) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } + + ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c new file mode 100644 index 00000000000..6842fb873bc --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_code.h" + +#include +#include +#include + +#include "radeon_program.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; + for(comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = RC_SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} + +void rc_constants_print(struct rc_constant_list * c) +{ + unsigned int i; + for(i = 0; i < c->Count; i++) { + if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { + float * values = c->Constants[i].u.Immediate; + fprintf(stderr, "CONST[%u] = " + "{ %10.4f %10.4f %10.4f %10.4f }\n", + i, values[0],values[1], values[2], values[3]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h new file mode 100644 index 00000000000..67e6acf8b10 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.h @@ -0,0 +1,306 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_TEXSCALE_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); +void rc_constants_print(struct rc_constant_list * c); + +/** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * This field contains swizzle for some lowering passes + * (shadow comparison, unorm->snorm conversion) + */ + unsigned texture_swizzle:12; + + /** + * If the sampler is used as a shadow sampler, + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). + * \sa rc_compare_func + */ + unsigned texture_compare_func : 3; + + /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; + + /** + * If the sampler will receive non-normalized coords, + * this field is set. The scaling factor is given by + * RC_STATE_R300_TEXRECT_FACTOR. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 3; + + /** + * The coords are scaled after applying the wrap mode emulation + * and right before texture fetch. The scaling factor is given by + * RC_STATE_R300_TEXSCALE_FACTOR. */ + unsigned clamp_and_scale_before_fetch : 1; + + /** + * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM + * in the shader. + */ + unsigned convert_unorm_to_snorm:1; + } unit[16]; + + unsigned frag_clamp:1; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; + } tex; + + struct { + unsigned int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; + + uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; + unsigned *constants_remap_table; +}; + + +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + unsigned *constants_remap_table; + + uint32_t InputsRead; + uint32_t OutputsWritten; + + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; + +#endif /* RADEON_CODE_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c new file mode 100644 index 00000000000..b7936725d85 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -0,0 +1,489 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include +#include +#include + +#include "radeon_dataflow.h" +#include "radeon_program.h" +#include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!(c->Debug & RC_DBG_LOG)) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = 1; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction * inst_rcp; + struct rc_instruction * inst_mul; + struct rc_instruction * inst_mad; + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; + + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + /* viewport transformation */ + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; + + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } + + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + +static void reg_count_callback(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + int *max_reg = userdata; + if (file == RC_FILE_TEMPORARY) + (int)index > *max_reg ? *max_reg = index : 0; +} + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) +{ + int max_reg = -1; + struct rc_instruction * tmp; + memset(s, 0, sizeof(*s)); + + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + const struct rc_opcode_info * info; + rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) + continue; + if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) + s->num_presub_ops++; + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + s->num_alpha_insts++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + s->num_rgb_insts++; + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) + s->num_fc_insts++; + if (info->HasTexture) + s->num_tex_insts++; + s->num_insts++; + } + s->num_temp_regs = max_reg + 1; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_program_stats s; + + if (c->initial_num_insts <= 5) + return; + + rc_get_stats(c, &s); + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); + } +} + +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + for (unsigned i = 0; list[i].name; i++) { + if (list[i].predicate) { + list[i].run(c, list[i].user); + + if (c->Error) + return; + + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); + rc_print_program(&c->Program); + } + } + } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + struct rc_program_stats s; + + rc_get_stats(c, &s); + c->initial_num_insts = s.num_insts; + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + + if (c->Debug & RC_DBG_STATS) + print_stats(c); +} + +void rc_validate_final_shader(struct radeon_compiler *c, void *user) +{ + /* Check the number of constants. */ + if (c->Program.Constants.Count > c->max_constants) { + rc_error(c, "Too many constants. Max: %i, Got: %i\n", + c->max_constants, c->Program.Constants.Count); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h new file mode 100644 index 00000000000..74594af23c2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -0,0 +1,171 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/compiler.h" + +#include "memory_pool.h" +#include "radeon_code.h" +#include "radeon_program.h" +#include "radeon_emulate_loops.h" + +#define RC_DBG_LOG (1 << 0) +#define RC_DBG_STATS (1 << 1) + +struct rc_swizzle_caps; + +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + enum rc_program_type type; + unsigned Debug:2; + unsigned Error:1; + char * ErrorMsg; + + /* Hardware specification. */ + unsigned is_r400:1; + unsigned is_r500:1; + unsigned has_half_swizzles:1; + unsigned has_presub:1; + unsigned disable_optimizations:1; + unsigned max_temp_regs; + unsigned max_constants; + int max_alu_insts; + unsigned max_tex_insts; + + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants:1; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ + + struct emulate_loop_state loop_state; + + unsigned initial_num_insts; /* Number of instructions at start. */ +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ + struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Register corresponding to the depthbuffer. */ + unsigned OutputDepth; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + uint32_t RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + + int PredicateIndex; + unsigned int PredicateMask; +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); + +struct radeon_compiler_pass { + const char *name; /* Name of the pass. */ + int dump; /* Dump the program if Debug == 1? */ + int predicate; /* Run this pass? */ + void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ + void *user; /* Optional parameter which is passed to the run function. */ +}; + +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_validate_final_shader(struct radeon_compiler *c, void *user); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c new file mode 100644 index 00000000000..2742721f800 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -0,0 +1,701 @@ +/* + * Copyright 2010 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_compiler_util.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +/** + */ +unsigned int rc_swizzle_to_writemask(unsigned int swz) +{ + unsigned int mask = 0; + unsigned int i; + + for(i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(swz, i); + } + mask &= RC_MASK_XYZW; + + return mask; +} + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +/** + * The purpose of this function is to standardize the number channels used by + * swizzles. All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +{ + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +/** + * @return A swizzle the results from converting old_swizzle using + * conversion_swizzle + */ +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle) +{ + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; +} + +static unsigned int rewrite_writemask( + unsigned int old_mask, + unsigned int conversion_swizzle) +{ + unsigned int new_mask = 0; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) + || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } + + return new_mask; +} + +/** + * This function rewrites the writemask of sub and adjusts the swizzles + * of all its source registers based on the conversion_swizzle. + * conversion_swizzle represents a mapping of the old writemask to the + * new writemask. For a detailed description of how conversion swizzles + * work see rc_rewrite_swizzle(). + */ +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + unsigned int i; + + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + + if (!srcs_need_rewrite(info)) { + return ; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = + rc_adjust_channels(sub->Arg[i].Swizzle, + conversion_swizzle); + } +} + +static void normal_rewrite_writemask_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + unsigned int * new_mask = (unsigned int *)userdata; + src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask); +} + +/** + * This function is the same as rc_pair_rewrite_writemask() except it + * operates on normal instructions. + */ +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle) +{ + unsigned int new_mask; + struct rc_sub_instruction * sub = &inst->U.I; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = + rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } + + if (!srcs_need_rewrite(info)) { + return; + } + + new_mask = sub->DstReg.WriteMask; + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); +} + +/** + * This function replaces each value 'swz' in swizzle with the value of + * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's + * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want + * to change all the Y's in swizzle to X, then conversion_swizzle should be + * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then + * conversion swizzle should be YX__ (0xfc1). + * @param swizzle The swizzle to change + * @param conversion_swizzle Describes the conversion to perform on the swizzle + * @return A converted swizzle + */ +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int conversion_swizzle) +{ + unsigned int chan; + unsigned int out_swizzle = swizzle; + + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask) +{ + if (src_file != dst_file || src_idx != dst_idx) { + return RC_MASK_NONE; + } + return dst_mask & rc_swizzle_to_writemask(src_swz); +} + +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct src_select { + rc_register_file File; + int Index; + unsigned int SrcType; +}; + +struct can_use_presub_data { + struct src_select Selects[5]; + unsigned int SelectCount; + const struct rc_src_register * ReplaceReg; + unsigned int ReplaceRemoved; +}; + +static void can_use_presub_data_add_select( + struct can_use_presub_data * data, + rc_register_file file, + unsigned int index, + unsigned int src_type) +{ + struct src_select * select; + + select = &data->Selects[data->SelectCount++]; + select->File = file; + select->Index = index; + select->SrcType = src_type; +} + +/** + * This callback function counts the number of sources in inst that are + * different from the sources in can_use_presub_data->RemoveSrcs. + */ +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct can_use_presub_data * d = userdata; + + if (!d->ReplaceRemoved && src == d->ReplaceReg) { + d->ReplaceRemoved = 1; + return; + } + + if (src->File == RC_FILE_NONE) + return; + + can_use_presub_data_add_select(d, src->File, src->Index, + rc_source_type_swz(src->Swizzle)); +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + int rgb_count = 0, alpha_count = 0; + unsigned int src_type0, src_type1; + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.ReplaceReg = replace_reg; + + rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + src_type0 = rc_source_type_swz(presub_src0->Swizzle); + can_use_presub_data_add_select(&d, + presub_src0->File, + presub_src0->Index, + src_type0); + + if (num_presub_srcs > 1) { + src_type1 = rc_source_type_swz(presub_src1->Swizzle); + can_use_presub_data_add_select(&d, + presub_src1->File, + presub_src1->Index, + src_type1); + + /* Even if both of the presub sources read from the same + * register, we still need to use 2 different source selects + * for them, so we need to increment the count to compensate. + */ + if (presub_src0->File == presub_src1->File + && presub_src0->Index == presub_src1->Index) { + if (src_type0 & src_type1 & RC_SOURCE_RGB) { + rgb_count++; + } + if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + } + + /* Count the number of source selects for Alpha and RGB. If we + * encounter two of the same source selects then we can ignore the + * first one. */ + for (i = 0; i < d.SelectCount; i++) { + unsigned int j; + unsigned int src_type = d.Selects[i].SrcType; + for (j = i + 1; j < d.SelectCount; j++) { + if (d.Selects[i].File == d.Selects[j].File + && d.Selects[i].Index == d.Selects[j].Index) { + src_type &= ~d.Selects[j].SrcType; + } + } + if (src_type & RC_SOURCE_RGB) { + rgb_count++; + } + + if (src_type & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + + if (rgb_count > 3 || alpha_count > 3) { + return 0; + } + + return 1; +} + +struct max_data { + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; +}; + +static void max_callback( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct max_data * d = (struct max_data*)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } +} + +/** + * @return The maximum index of the specified register file used by the + * program. + */ +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file) +{ + struct max_data data; + struct rc_instruction * inst; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } +} + +static unsigned int get_source_readmask( + struct rc_pair_sub_instruction * sub, + unsigned int source, + unsigned int src_type) +{ + unsigned int i; + unsigned int readmask = 0; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for (i = 0; i < info->NumSrcRegs; i++) { + if (sub->Arg[i].Source != source + || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { + continue; + } + readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); + } + return readmask; +} + +/** + * This function attempts to remove a source from a pair instructions. + * @param inst + * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd + * @param source The index of the source to remove + * @param new_readmask A mask representing the components that are read by + * the source that is intended to replace the one you are removing. If you + * want to remove a source only and not replace it, this parameter should be + * zero. + * @return 1 if the source was successfully removed, 0 if it was not + */ +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask) +{ + unsigned int readmask = 0; + + readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); + readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); + + if ((new_readmask & readmask) != readmask) + return 0; + + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + return 1; +} + +/** + * @return RC_OPCODE_NOOP if inst is not a flow control instruction. + * @return The opcode of inst if it is a flow control instruction. + */ +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || + inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; + +} + +/** + * @return The BGNLOOP instruction that starts the loop ended by endloop. + */ +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) +{ + unsigned int endloop_count = 0; + struct rc_instruction * inst; + for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + endloop_count++; + } else if (op == RC_OPCODE_BGNLOOP) { + if (endloop_count == 0) { + return inst; + } else { + endloop_count--; + } + } + } + return NULL; +} + +/** + * @return The ENDLOOP instruction that ends the loop started by bgnloop. + */ +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) +{ + unsigned int bgnloop_count = 0; + struct rc_instruction * inst; + for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + bgnloop_count++; + } else if (op == RC_OPCODE_ENDLOOP) { + if (bgnloop_count == 0) { + return inst; + } else { + bgnloop_count--; + } + } + } + return NULL; +} + +/** + * @return A conversion swizzle for converting from old_mask->new_mask + */ +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask) +{ + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + unsigned int old_idx; + unsigned int new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for ( ; new_idx < 4; new_idx++) { + if (GET_BIT(new_mask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + return conversion_swizzle; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h new file mode 100644 index 00000000000..3730aa888c0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -0,0 +1,89 @@ +#include "radeon_program_constants.h" + +#ifndef RADEON_PROGRAM_UTIL_H +#define RADEON_PROGRAM_UTIL_H + +#include "radeon_opcodes.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_pair_instruction; +struct rc_pair_sub_instruction; +struct rc_src_register; + +unsigned int rc_swizzle_to_writemask(unsigned int swz); + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle); + +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle); + +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle); + +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int new_mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask); + +unsigned int rc_source_type_swz(unsigned int swizzle); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1); + +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file); + +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask); + +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); + +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); + +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask); + +#endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c new file mode 100644 index 00000000000..a8decacedaf --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -0,0 +1,892 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +struct read_write_mask_data { + void * UserData; + rc_read_write_mask_fn Cb; +}; + +static void reads_normal_callback( + void * userdata, + struct rc_instruction * fullinst, + struct rc_src_register * src) +{ + struct read_write_mask_data * cb_data = userdata; + unsigned int refmask = 0; + unsigned int chan; + for(chan = 0; chan < 4; chan++) { + refmask |= 1 << GET_SWZ(src->Swizzle, chan); + } + refmask &= RC_MASK_XYZW; + + if (refmask) { + cb_data->Cb(cb_data->UserData, fullinst, src->File, + src->Index, refmask); + } + + if (refmask && src->RelAddr) { + cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, + RC_MASK_X); + } +} + +static void pair_get_src_refmasks(unsigned int * refmasks, + struct rc_pair_instruction * inst, + unsigned int swz, unsigned int src) +{ + if (swz >= 4) + return; + + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + if(src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = + rc_presubtract_src_reg_count( + inst->RGB.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } + + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count( + inst->Alpha.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + + unsigned int arg; + + for(arg = 0; arg < 3; ++arg) { + unsigned int chan; + for(chan = 0; chan < 3; ++chan) { + unsigned int swz_rgb = + GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + unsigned int swz_alpha = + GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); + pair_get_src_refmasks(refmasks, inst, swz_rgb, + inst->RGB.Arg[arg].Source); + pair_get_src_refmasks(refmasks, inst, swz_alpha, + inst->Alpha.Arg[arg].Source); + } + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); + + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); + } +} + +static void pair_sub_for_all_args( + struct rc_instruction * fullinst, + struct rc_pair_sub_instruction * sub, + rc_pair_read_arg_fn cb, + void * userdata) +{ + int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type; + + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } + } +} + +/* This function calls the callback function (cb) for each source used by + * the instruction. + * */ +void rc_for_all_reads_src( + struct rc_instruction * inst, + rc_read_src_fn cb, + void * userdata) +{ + const struct rc_opcode_info * opcode = + rc_get_opcode_info(inst->U.I.Opcode); + + /* This function only works with normal instructions. */ + if (inst->Type != RC_INSTRUCTION_NORMAL) { + assert(0); + return; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + + if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) + continue; + + if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count( + inst->U.I.PreSub.Opcode); + for( i = 0; i < srcp_regs; i++) { + cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); + } + } else { + cb(userdata, inst, &inst->U.I.SrcReg[src]); + } + } +} + +/** + * This function calls the callback function (cb) for each arg of the RGB and + * alpha components. + */ +void rc_pair_for_all_reads_arg( + struct rc_instruction * inst, + rc_pair_read_arg_fn cb, + void * userdata) +{ + /* This function only works with pair instructions. */ + if (inst->Type != RC_INSTRUCTION_PAIR) { + assert(0); + return; + } + + pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); + pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); +} + +/** + * Calls a callback function for all register reads. + * + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct read_write_mask_data cb_data; + cb_data.UserData = userdata; + cb_data.Cb = cb; + + rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +/** + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} + + +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + if (file == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_srcs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + + for(i = 0; i < srcp_srcs; i++) { + file = inst->PreSub.SrcReg[i].File; + index = inst->PreSub.SrcReg[i].Index; + cb(userdata, fullinst, &file, &index); + inst->PreSub.SrcReg[i].File = file; + inst->PreSub.SrcReg[i].Index = index; + } + remapped_presub = 1; + } + else { + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} + +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + +struct get_readers_callback_data { + struct radeon_compiler * C; + struct rc_reader_data * ReaderData; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; + rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; + unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; +}; + +static struct rc_reader * add_reader( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask) +{ + struct rc_reader * new; + memory_pool_array_reserve(pool, struct rc_reader, data->Readers, + data->ReaderCount, data->ReadersReserved, 1); + new = &data->Readers[data->ReaderCount++]; + new->Inst = inst; + new->WriteMask = mask; + return new; +} + +static void add_reader_normal( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_src_register * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.I.Src = src; +} + + +static void add_reader_pair( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.P.Src = src; + new->U.P.Arg = arg; +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + if (cb_data->ReaderData->LoopDepth > 0) { + cb_data->ReaderData->AbortOnWrite |= + (read_mask & cb_data->AliveWriteMask); + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine whether inst + * is a reader of userdata->ReaderData->Writer + */ +static void get_readers_normal_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct get_readers_callback_data * d = userdata; + unsigned int shared_mask; + + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine when + * userdata->ReaderData->Writer is dead (i. e. All compontents of its + * destination register have been overwritten by other instructions). + */ +static void get_readers_write_callback( + void *userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_readers_callback_data * d = userdata; + + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + if (d->ReaderData->AbortOnWrite & shared_mask) { + d->ReaderData->Abort = 1; + } + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void push_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + (*branch_depth)++; + if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[*branch_depth].IfWriteMask = + d->AliveWriteMask; +} + +static void pop_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; + + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + d->ReaderData->AbortOnRead |= + masks->ElseWriteMask & ~d->AliveWriteMask; + + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) + & (masks->IfWriteMask ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, sizeof(struct branch_write_mask)); + (*branch_depth)--; +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct rc_instruction * endloop = NULL; + unsigned int abort_on_read_at_endloop = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->AbortOnWrite = 0; + d->ReaderData->LoopDepth = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = rc_get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + d->ReaderData->LoopDepth++; + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ENDLOOP: + if (d->ReaderData->LoopDepth > 0) { + d->ReaderData->LoopDepth--; + if (d->ReaderData->LoopDepth == 0) { + d->ReaderData->AbortOnWrite = 0; + } + pop_branch_mask(d, &branch_depth); + } else { + /* Here we have reached an ENDLOOP without + * seeing its BGNLOOP. These means that + * the writer was written inside of a loop, + * so it could have readers that are above it + * (i.e. they have a lower IP). To find these + * readers we jump to the BGNLOOP instruction + * and check each instruction until we get + * back to the writer. + */ + endloop = tmp; + tmp = rc_match_endloop(tmp); + if (!tmp) { + rc_error(d->C, "Failed to match endloop.\n"); + d->ReaderData->Abort = 1; + return; + } + abort_on_read_at_endloop = d->ReaderData->AbortOnRead; + d->ReaderData->AbortOnRead |= d->AliveWriteMask; + continue; + } + break; + case RC_OPCODE_IF: + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + pop_branch_mask(d, &branch_depth); + } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); + } + + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ + if (tmp == writer) { + tmp = endloop; + endloop = NULL; + d->ReaderData->AbortOnRead = abort_on_read_at_endloop; + continue; + } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } +} + +static void init_get_readers_callback_data( + struct get_readers_callback_data * d, + struct rc_reader_data * reader_data, + struct radeon_compiler * c, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; + + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; +} + +/** + * This function will create a list of readers via the rc_reader_data struct. + * This function will abort (set the flag data->Abort) and return if it + * encounters an instruction that reads from @param writer and also a different + * instruction. Here are some examples: + * + * writer = instruction 0; + * 0 MOV TEMP[0].xy, TEMP[1].xy + * 1 MOV TEMP[0].zw, TEMP[2].xy + * 2 MOV TEMP[3], TEMP[0] + * The Abort flag will be set on instruction 2, because it reads values written + * by instructions 0 and 1. + * + * writer = instruction 1; + * 0 IF TEMP[0].x + * 1 MOV TEMP[1], TEMP[2] + * 2 ELSE + * 3 MOV TEMP[1], TEMP[2] + * 4 ENDIF + * 5 MOV TEMP[3], TEMP[1] + * The Abort flag will be set on instruction 5, because it could read from the + * value written by either instruction 1 or 3, depending on the jump decision + * made at instruction 0. + * + * writer = instruction 0; + * 0 MOV TEMP[0], TEMP[1] + * 2 BGNLOOP + * 3 ADD TEMP[0], TEMP[0], none.1 + * 4 ENDLOOP + * The Abort flag will be set on instruction 3, because in the first iteration + * of the loop it reads the value written by instruction 0 and in all other + * iterations it reads the value written by instruction 3. + * + * @param read_cb This function will be called for for every instruction that + * has been determined to be a reader of writer. + * @param write_cb This function will be called for every instruction after + * writer. + */ +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); +} + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, + sub_writer->DestIndex, sub_writer->WriteMask); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h new file mode 100644 index 00000000000..d8a627258ea --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; +struct rc_src_register; +struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; +struct rc_pair_sub_instruction; +struct rc_compiler; + + +/** + * Help analyze and modify the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); + +typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src); +void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, + void * userdata); + +typedef void (*rc_pair_read_arg_fn)(void * userdata, + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); +void rc_pair_for_all_reads_arg(struct rc_instruction * inst, + rc_pair_read_arg_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); +/*@}*/ + +struct rc_reader { + struct rc_instruction * Inst; + unsigned int WriteMask; + union { + struct { + struct rc_src_register * Src; + } I; + struct { + struct rc_pair_instruction_arg * Arg; + struct rc_pair_instruction_source * Src; + } P; + } U; +}; + +struct rc_reader_data { + unsigned int Abort; + unsigned int AbortOnRead; + unsigned int AbortOnWrite; + unsigned int LoopDepth; + unsigned int InElse; + struct rc_instruction * Writer; + + unsigned int ReaderCount; + unsigned int ReadersReserved; + struct rc_reader * Readers; + + /* If this flag is enabled, rc_get_readers will exit as soon possbile + * after the Abort flag is set.*/ + unsigned int ExitOnAbort; + void * CbData; +}; + +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); +/*@}*/ + +void rc_optimize(struct radeon_compiler * c, void *user); + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 00000000000..678e1475883 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + +static void push_branch(struct deadcode_state * s) +{ + struct branchinfo * branch; + + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); + + branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + unsigned int srcmasks[3]; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) +{ + struct deadcode_state s; + unsigned int nr_instructions; + rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(c, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + switch(opcode->Opcode){ + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + */ + case RC_OPCODE_ENDLOOP: + { + int endloops = 1; + struct rc_instruction *ptr; + for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + endloops--; + continue; + } + if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ + endloops++; + continue; + } + if(opcode->HasDstReg){ + int src = 0; + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(ptr, + ptr->U.I.DstReg.WriteMask, srcmasks); + for(src=0; src < opcode->NumSrcRegs; src++){ + mark_used(&s, + ptr->U.I.SrcReg[src].File, + ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + push_loop(&s); + break; + } + case RC_OPCODE_BRK: + push_break(&s); + break; + case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONT: + break; + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } + } + + update_instruction(&s, inst); + } + + ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int dead = 1; + unsigned int srcmasks[3]; + unsigned int usemask; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(inst, usemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 00000000000..133a9f72ec7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 00000000000..7bede344f30 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,342 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct rc_instruction * inst_mov; + + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes_mask(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode; + + if (!s->BranchCount) + return; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + struct rc_instruction * inst_mov; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler *c, void *user) +{ + struct emulate_branch_state s; + struct rc_instruction * ptr; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 00000000000..818ab84d0cd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler *c, void *user); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 00000000000..205eecd1129 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,522 @@ +/* + * Copyright 2010 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct const_value { + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop) +{ + unsigned int total_i = rc_recompute_ips(c); + unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; + /* +1 because the program already has one iteration of the loop. */ + return 1 + ((c->max_alu_insts - total_i) / loop_i); +} + +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(c); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } +} + +/** + * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless + * of how many iterations they have. + */ +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) +{ + int end_loops; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ + limit = &loop->Cond->U.I.SrcReg[0]; + counter = &loop->Cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ + limit = &loop->Cond->U.I.SrcReg[1]; + counter = &loop->Cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = c; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + end_loops = 1; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + case RC_OPCODE_BRK: + /* Don't unroll loops if it has a BRK instruction + * other one used when testing the main conditional + * of the loop. */ + + /* Make sure we haven't entered a nested loops. */ + if(inst != loop->Brk && end_loops == 1) { + return 0; + } + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(c, limit, 0); + DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: + iterations = (int) ceilf((limit_value - counter_value.Value) / + count_inst.Amount); + break; + + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: + iterations = (int) floorf((limit_value - counter_value.Value) / + count_inst.Amount) + 1; + break; + default: + return 0; + } + + if (c->max_alu_insts > 0 + && iterations > loop_max_possible_iterations(c, loop)) { + return 0; + } + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + unroll_loop(c, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * @param c + * @param loop + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. + */ +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, + struct rc_instruction * inst) +{ + struct rc_instruction * ptr; + + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; + } + + memset(loop, 0, sizeof(struct loop_info)); + + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; + } + break; + } + case RC_OPCODE_BRK: + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; + } + loop->Brk = ptr; + loop->If = ptr->Prev; + loop->EndIf = ptr->Next; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + return 0; + } + loop->Cond = loop->If->Prev; + break; + + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * -> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> + * -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 for success, 0 for failure + */ +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) { + rc_error(s->C, "Failed to build loop info\n"); + return 0; + } + + if(try_unroll_loop(s->C, loop)){ + return 1; + } + + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return 0; + } + + /* Prepare the loop to be emulated */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return 1; +} + +void rc_transform_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + struct rc_instruction * ptr; + + memset(s, 0, sizeof(struct emulate_loop_state)); + s->C = c; + for(ptr = s->C->Program.Instructions.Next; + ptr != &s->C->Program.Instructions; ptr = ptr->Next) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if (!transform_loop(s, ptr)) + return; + } + } +} + +void rc_unroll_loops(struct radeon_compiler *c, void *user) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop); + } + } + } +} + +void rc_emulate_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + unsigned int iterations; + + if(!s->Loops[i].EndLoop){ + continue; + } + iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); + unroll_loop(s->C, &s->Loops[i], iterations); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 00000000000..cd800c059d9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,32 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +void rc_transform_loops(struct radeon_compiler *c, void *user); + +void rc_unroll_loops(struct radeon_compiler * c, void *user); + +void rc_emulate_loops(struct radeon_compiler * c, void *user); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c new file mode 100644 index 00000000000..811c908a81a --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.c @@ -0,0 +1,90 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_list.h" + +#include +#include + +#include "memory_pool.h" + +struct rc_list * rc_list(struct memory_pool * pool, void * item) +{ + struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; + + return new; +} + +void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +{ + struct rc_list * temp; + + if (*list == NULL) { + *list = new_value; + return; + } + + for (temp = *list; temp->Next; temp = temp->Next); + + temp->Next = new_value; + new_value->Prev = temp; +} + +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +{ + if (*list == rm_value) { + *list = rm_value->Next; + return; + } + + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } +} + +unsigned int rc_list_count(struct rc_list * list) +{ + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; +} + +void rc_list_print(struct rc_list * list) +{ + while(list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h new file mode 100644 index 00000000000..b3c8f89cc68 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_LIST_H +#define RADEON_LIST_H + +struct memory_pool; + +struct rc_list { + void * Item; + struct rc_list * Prev; + struct rc_list * Next; +}; + +struct rc_list * rc_list(struct memory_pool * pool, void * item); +void rc_list_add(struct rc_list ** list, struct rc_list * new_value); +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); +unsigned int rc_list_count(struct rc_list * list); +void rc_list_print(struct rc_list * list); + +#endif /* RADEON_LIST_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c new file mode 100644 index 00000000000..afd78ad79dd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" +#include "radeon_program.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CND, + .Name = "CND", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + }, + { + .Opcode = RC_OPCODE_KILP, + .Name = "KILP", + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; + case RC_OPCODE_DP3: + case RC_OPCODE_XPD: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + case RC_OPCODE_TXL: + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_TXD: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_Y; + /* Fall through. */ + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + srcmasks[1] |= RC_MASK_X; + srcmasks[2] |= RC_MASK_X; + break; + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_Z; + /* Fall through. */ + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + srcmasks[2] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + srcmasks[2] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_DST: + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; + break; + default: + break; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h new file mode 100644 index 00000000000..b5868820611 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ + RC_OPCODE_CND, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, + + RC_OPCODE_CONT, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + /** Stop execution of the shader (GLSL discard) */ + RC_OPCODE_KILP, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +struct rc_instruction; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c new file mode 100644 index 00000000000..39dcb21d4f4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -0,0 +1,700 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_swizzle.h" + +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; +}; + +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, + struct rc_instruction *, + unsigned int); + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, + struct rc_src_register * src) +{ + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { + reader_data->Abort = 1; + return; + } + + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; + return; + } + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + reader_data->Abort = 1; + return; + } +} + +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } +} + +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data * reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); +} + +static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct rc_reader_data reader_data; + unsigned int i; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst_mov->U.I.WriteALUResult || + inst_mov->U.I.SaturateMode) + return; + + /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return; + + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); + + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate= 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + + /* Convert immediates to swizzles. */ + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + + if (swz >= 4) + continue; + + imm = constant->u.Immediate[swz]; + baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5 && c->has_half_swizzles) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } +} + +/** + * If src and dst use the same register, this function returns a writemask that + * indicates wich components are read by src. Otherwise zero is returned. + */ +static unsigned int src_reads_dst_mask(struct rc_src_register src, + struct rc_dst_register dst) +{ + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + return rc_swizzle_to_writemask(src.Swizzle); +} + +/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) + * in any of its channels. Return 0 otherwise. */ +static int src_has_const_swz(struct rc_src_register src) { + int chan; + for(chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF + || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; +} + +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = data; + rc_presubtract_op * presub_opcode = reader_data->CbData; + + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + src, + &reader_data->Writer->U.I.SrcReg[0], + &reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; + } +} + +static int presub_helper( + struct radeon_compiler * c, + struct rc_instruction * inst_add, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) +{ + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + + reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) + presub_replace(inst_add, reader.Inst, src_index); + } + } + return 1; +} + +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + rc_presubtract_op presub_opcode; + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + presub_opcode = RC_PRESUB_SUB; + else + presub_opcode = RC_PRESUB_ADD; + + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; + } else { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; + } + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; +} + +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + unsigned int is_constant[2] = {0, 0}; + + assert(inst->U.I.Opcode == RC_OPCODE_ADD); + + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { + return 0; + } + + /* If both sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = + get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE + || swz == RC_SWIZZLE_ZERO + || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; + + for(i = 0; i < info->NumSrcRegs; i++) { + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) + return 0; + } + return 1; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; + + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; + + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +/** + * @return + * 0 if inst is still part of the program. + * 1 if inst is no longer part of the program. + */ +static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +{ + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + if (c->has_presub) { + if(peephole_add_presub_inv(c, inst)) + return 1; + if(peephole_add_presub_add(c, inst)) + return 1; + } + break; + default: + break; + } + return 0; +} + +void rc_optimize(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if(peephole(c, cur)) + continue; + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c new file mode 100644 index 00000000000..1e9a2c09d44 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c @@ -0,0 +1,62 @@ + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_opcodes.h" +#include "radeon_program_pair.h" + +static void mark_used_presub(struct rc_pair_sub_instruction * sub) +{ + if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int presub_reg_count = rc_presubtract_src_reg_count( + sub->Src[RC_PAIR_PRESUB_SRC].Index); + unsigned int i; + for (i = 0; i < presub_reg_count; i++) { + sub->Src[i].Used = 1; + } + } +} + +static void mark_used( + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub) +{ + unsigned int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + if (src_type & RC_SOURCE_RGB) { + inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; + } + + if (src_type & RC_SOURCE_ALPHA) { + inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; + } + } +} + +/** + * This pass finds sources that are not used by their instruction and marks + * them as unused. + */ +void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + unsigned int i; + if (inst->Type == RC_INSTRUCTION_NORMAL) + continue; + + /* Mark all sources as unused */ + for (i = 0; i < 4; i++) { + inst->U.P.RGB.Src[i].Used = 0; + inst->U.P.Alpha.Src[i].Used = 0; + } + mark_used(inst, &inst->U.P.RGB); + mark_used(inst, &inst->U.P.Alpha); + + mark_used_presub(&inst->U.P.RGB); + mark_used_presub(&inst->U.P.Alpha); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 00000000000..49983d6ce75 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,706 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include + +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + +#include "r300_fragprog_swizzle.h" +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_variable.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + + +struct register_info { + struct live_intervals Live[4]; + + unsigned int Used:1; + unsigned int Allocated:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int Writemask; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + int LoopEnd; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src || !src->Used) { + DBG("(null)"); + return; + } + + DBG("(%i,%i)", src->Start, src->End); +} + +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(a); + DBG(" to "); + print_live_intervals(b); + DBG("\n"); + } + + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; + } + + if (a->Start > b->Start) { + if (a->Start < b->End) { + DBG(" overlap\n"); + return 1; + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { + DBG(" overlap\n"); + return 1; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static void scan_read_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct regalloc_state * s = data; + struct register_info * reg; + unsigned int i; + + if (file != RC_FILE_INPUT) + return; + + s->Input[index].Used = 1; + reg = &s->Input[index]; + + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; + } + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = + s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; + } +} + +static void remap_register(void * data, struct rc_instruction * inst, + rc_register_file * file, unsigned int * index) +{ + struct regalloc_state * s = data; + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY && s->Simple) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *index = reg->Index; + } +} + +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} + +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) +{ + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} + +static int find_class( + struct rc_class * classes, + unsigned int writemask, + unsigned int max_writemask_count) +{ + unsigned int i; + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (classes[i].WritemaskCount > max_writemask_count) { + continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return i; + } + } + } + return -1; +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + int class_index; + + if (!variable->C->is_r500) { + struct rc_class c; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + + /* Check if it is possible to do swizzle packing for r300/r400 + * without creating non-native swizzles. */ + class_index = find_class(classes, writemask, 3); + if (class_index < 0) { + goto error; + } + c = classes[class_index]; + for (i = 0; i < c.WritemaskCount; i++) { + int j; + unsigned int conversion_swizzle = + rc_make_conversion_swizzle( + writemask, c.Writemasks[i]); + for (j = 0; j < variable->ReaderCount; j++) { + unsigned int old_swizzle; + unsigned int new_swizzle; + struct rc_reader r = variable->Readers[j]; + if (r.Inst->Type == RC_INSTRUCTION_PAIR ) { + old_swizzle = r.U.P.Arg->Swizzle; + } else { + old_swizzle = r.U.I.Src->Swizzle; + } + new_swizzle = rc_adjust_channels( + old_swizzle, conversion_swizzle); + if (!r300_swizzle_is_native_basic(new_swizzle)) { + can_change_writemask = 0; + break; + } + } + if (!can_change_writemask) { + break; + } + } + } + + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } + } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } + + class_index = find_class(classes, writemask, + can_change_writemask ? 3 : 1); + if (class_index > -1) { + return classes[class_index].Class; + } else { +error: + rc_error(variable->C, + "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; + } +} + +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } + } + return 0; +} + +static unsigned int reg_get_index(int reg) +{ + return reg / RC_MASK_XYZW; +} + +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} + +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} + +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif + +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } +} + +static void do_advanced_regalloc(struct regalloc_state * s) +{ + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } + + node_classes[node_index] = rc_class_list[class_index].Id; + } + + + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } + + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > s->LoopEnd) { + s->LoopEnd = endloop->IP; + } + } + rc_for_all_reads_mask(inst, scan_read_callback, s); + } + + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } + + ra_set_finalize(regs); + + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); + + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; + + ra_set_node_class(graph, node_index, node_classes[node_index]); + + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } + } + + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); + } + } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } + + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int * do_full_regalloc = (int*)user; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (*do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 00000000000..25cd52c9cd4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,1010 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void add_inst_to_list_end(struct schedule_instruction ** list, + struct schedule_instruction * inst) +{ + if(!*list){ + *list = inst; + }else{ + struct schedule_instruction * temp = *list; + while(temp->NextReady){ + temp = temp->NextReady; + } + temp->NextReady = inst; + } +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + /* Adding Ready TEX instructions to the end of the "Ready List" helps + * us emit TEX instructions in blocks without losing our place. */ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list_end(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +/** + * This function decreases the dependencies of the next instruction that + * wants to write to each of sinst's read values. + */ +static void commit_update_reads(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_update_writes(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + commit_update_reads(s, sinst); + + commit_update_writes(s, sinst); +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + struct rc_instruction * inst_begin; + + assert(s->ReadyTEX); + + /* Node marker for R300 */ + inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + readytex = s->ReadyTEX; + while(readytex) { + rc_insert_instruction(before->Prev, readytex->Instruction); + DBG("%i: commit TEX reads\n", readytex->Instruction->IP); + + /* All of the TEX instructions in the same TEX block have + * their source registers read from before any of the + * instructions in that block write to their destination + * registers. This means that when we commit a TEX + * instruction, any other TEX instruction that wants to write + * to one of the committed instruction's source register can be + * marked as ready and should be emitted in the same TEX + * block. This prevents the following sequence from being + * emitted in two different TEX blocks: + * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; + * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; + */ + commit_update_reads(s, readytex); + readytex = readytex->NextReady; + } + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + while(readytex){ + DBG("%i: commit TEX writes\n", readytex->Instruction->IP); + commit_update_writes(s, readytex); + readytex = readytex->NextReady; + } +} + +/* This is a helper function for destructive_merge_instructions(). It helps + * merge presubtract sources from two instructions and makes sure the + * presubtract sources end up in the correct spot. This function assumes that + * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) + * but no scalar instruction (alpha). + * @return 0 if merging the presubtract sources fails. + * @retrun 1 if merging the presubtract sources succeeds. + */ +static int merge_presub_sources( + struct rc_pair_instruction * dst_full, + struct rc_pair_sub_instruction src, + unsigned int type) +{ + unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; + struct rc_pair_sub_instruction * dst_sub; + const struct rc_opcode_info * info; + + assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); + + switch(type) { + case RC_SOURCE_RGB: + is_rgb = 1; + is_alpha = 0; + dst_sub = &dst_full->RGB; + break; + case RC_SOURCE_ALPHA: + is_rgb = 0; + is_alpha = 1; + dst_sub = &dst_full->Alpha; + break; + default: + assert(0); + return 0; + } + + info = rc_get_opcode_info(dst_full->RGB.Opcode); + + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + + srcp_regs = rc_presubtract_src_reg_count( + src.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + unsigned int one_way = 0; + struct rc_pair_instruction_source srcp = src.Src[srcp_src]; + struct rc_pair_instruction_source temp; + + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, + srcp.File, srcp.Index); + + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + + temp = dst_sub->Src[srcp_src]; + dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; + + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(dst_full, is_rgb, + is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; + one_way = 1; + } else { + dst_sub->Src[free_source] = temp; + } + + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; + + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for(arg = 0; arg < info->NumSrcRegs; arg++) { + /*If this arg does not read from an rgb source, + * do nothing. */ + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) + & type)) { + continue; + } + + if (dst_full->RGB.Arg[arg].Source == srcp_src) + dst_full->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if(dst_full->RGB.Arg[arg].Source == free_source + && !one_way) { + dst_full->RGB.Arg[arg].Source = srcp_src; + } + } + } + return 1; +} + + +/* This function assumes that rgb.Alpha and alpha.RGB are unused */ +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + const struct rc_opcode_info * opcode; + + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Presubtract registers need to be merged first so that registers + * needed by the presubtract operation can be placed in src0 and/or + * src1. */ + + /* Merge the rgb presubtract registers. */ + if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { + return 0; + } + } + /* Merge the alpha presubtract registers */ + if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ + return 0; + } + } + + /* Copy alpha args into rgb */ + opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + int source; + + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + /*Instructions can't write output registers and ALU result at the + * same time. */ + if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) + || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { + return 0; + } + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + +static void presub_nop(struct rc_instruction * emitted) { + int prev_rgb_index, prev_alpha_index, i, num_src; + + /* We don't need a nop if the previous instruction is a TEX. */ + if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { + return; + } + if (emitted->Prev->U.P.RGB.WriteMask) + prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; + else + prev_rgb_index = -1; + if (emitted->Prev->U.P.Alpha.WriteMask) + prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; + else + prev_alpha_index = 1; + + /* Check the previous rgb instruction */ + if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + num_src = rc_presubtract_src_reg_count( + emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.RGB.Src[i].Index; + if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index + || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } + } + + /* Check the previous alpha instruction. */ + if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return; + + num_src = rc_presubtract_src_reg_count( + emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.Alpha.Src[i].Index; + if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } +} + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = RC_MASK_W; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = + rc_init_swizzle(swz, 1); + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + struct schedule_instruction *prev; +pair: + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_alu_instruction(s, psirgb); + commit_alu_instruction(s, psialpha); + goto success; + } + } + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + success: ; + } + /* If the instruction we just emitted uses a presubtract value, and + * the presubtract sources were written by the previous intstruction, + * the previous instruction needs a nop. */ + presub_nop(before->Prev); +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** v = get_reg_valuep(s, file, index, chan); + struct reg_value_reader * reader; + + if (!v) + return; + + if (*v && (*v)->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = *v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + struct reg_value * newv; + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + ip = 0; + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + s.Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct radeon_compiler *cc, void *user) +{ + struct schedule_state s; + + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + while(inst != &c->Base.Program.Instructions) { + struct rc_instruction * first; + + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c new file mode 100644 index 00000000000..2dae56a2428 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_CND: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + +static void src_uses(struct rc_src_register src, unsigned int * rgb, + unsigned int * alpha) +{ + int j; + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(src.Swizzle, j); + if (swz < 3) + *rgb = 1; + else if (swz < 4) + *alpha = 1; + } +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + int needrgb, needalpha, istranscendent; + const struct rc_opcode_info * opcode; + int i; + + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + opcode = rc_get_opcode_info(inst->Opcode); + + /* Presubtract handling: + * We need to make sure that the values used by the presubtract + * operation end up in src0 or src1. */ + if(inst->PreSub.Opcode != RC_PRESUB_NONE) { + /* rc_pair_alloc_source() will fill in data for + * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ + int j; + for(j = 0; j < 3; j++) { + int src_regs; + if(inst->SrcReg[j].File != RC_FILE_PRESUB) + continue; + + src_regs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for(i = 0; i < src_regs; i++) { + unsigned int rgb = 0; + unsigned int alpha = 0; + src_uses(inst->SrcReg[j], &rgb, &alpha); + if(rgb) { + pair->RGB.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->RGB.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->RGB.Src[i].Used = 1; + } + if(alpha) { + pair->Alpha.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->Alpha.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->Alpha.Src[i].Used = 1; + } + } + } + } + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int srcmask = 0; + int j; + /* We don't care about the alpha channel here. We only + * want the part of the swizzle that writes to rgb, + * since we are creating an rgb instruction. */ + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + + if (swz < RC_SWIZZLE_W) + srcrgb = 1; + else if (swz == RC_SWIZZLE_W) + srcalpha = 1; + + if (swz < RC_SWIZZLE_UNUSED) + srcmask |= 1 << j; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "rgb instruction.\n"); + return; + } + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = + rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "alpha instruction.\n"); + return; + } + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + + if (needalpha) { + pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +static void check_opcode_support(struct r300_fragment_program_compiler *c, + struct rc_sub_instruction *inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode; + struct rc_sub_instruction copy; + + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + copy = inst->U.I; + + check_opcode_support(c, ©); + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c new file mode 100644 index 00000000000..fe5756ebc45 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void rc_local_transform( + struct radeon_compiler * c, + void *user) +{ + struct radeon_program_transformation *transformations = + (struct radeon_program_transformation*)user; + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; transformations[i].function; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_used_temporaries_data * d = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= d->UsedLength) + return; + + d->Used[index] |= mask; +} + +/** + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. + */ +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); + } +} + +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) +{ + int i; + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; + return i; + } + } + return -1; +} + +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; + + memset(used, 0, sizeof(used)); + + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); + + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return free; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + memset(inst, 0, sizeof(struct rc_instruction)); + + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + + return inst; +} + +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) +{ + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + +/** + * Return the number of instructions in the program. + */ +unsigned int rc_recompute_ips(struct radeon_compiler * c) +{ + unsigned int ip = 0; + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } + + c->Program.Instructions.IP = 0xcafedead; + + return ip; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h new file mode 100644 index 00000000000..b899eccbf53 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include +#include + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" + +struct radeon_compiler; + +struct rc_src_register { + unsigned int File:4; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; +}; + +struct rc_presub_instruction { + rc_presubtract_op Opcode; + struct rc_src_register SrcReg[2]; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + unsigned int Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + unsigned int SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + unsigned int TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle:12; + /*@}*/ + + /** This holds information about the presubtract operation used by + * this instruction. */ + struct rc_presub_instruction PreSub; +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +/** + * A transformation that can be passed to \ref rc_local_transform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return true, or return false if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + int (*function)( + struct radeon_compiler*, + struct rc_instruction*, + void*); + void *userData; +}; + +void rc_local_transform( + struct radeon_compiler *c, + void *user); + +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + +unsigned int rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); +void rc_remove_instruction(struct rc_instruction * inst); + +unsigned int rc_recompute_ips(struct radeon_compiler * c); + +void rc_print_program(const struct rc_program *prog); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c new file mode 100644 index 00000000000..9fc991166a3 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -0,0 +1,1154 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; + return fpi; +} + +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + return fpi; +} + +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; + return fpi; +} + +static struct rc_dst_register dstregtmpmask(int index, int mask) +{ + struct rc_dst_register dst = {0}; + dst.File = RC_FILE_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + return dst; +} + +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_0000 +}; +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_1111 +}; +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_XYZW +}; + +static struct rc_src_register srcreg(int file, int index) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct rc_src_register srcregswz(int file, int index, int swz) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct rc_src_register absolute(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = RC_MASK_NONE; + return newreg; +} + +static struct rc_src_register negate(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; + return newreg; +} + +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) +{ + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); + return swizzled; +} + +static struct rc_src_register swizzle_smear(struct rc_src_register reg, + rc_swizzle x) +{ + return swizzle(reg, x, x, x, x); +} + +static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_X); +} + +static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Y); +} + +static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Z); +} + +static struct rc_src_register swizzle_wwww(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_W); +} + +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src = inst->U.I.SrcReg[0]; + src.Abs = 1; + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); + rc_remove_instruction(inst); +} + +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +static void transform_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); + rc_remove_instruction(inst); +} + +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + rc_remove_instruction(inst); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; + + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); + + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + struct rc_instruction * inst_mov; + + inst_mov = emit1(c, inst, + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); + + /* tmp.x = max(0.0, Src.x); */ + /* tmp.y = max(0.0, Src.y); */ + /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle_wwww(srctemp), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); + + /* tmp.w = Pow(tmp.y, tmp.w) */ + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_yyyy(srctemp)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp), + swizzle_zzzz(srctemp)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp)); + + /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle_xxxx(srctemp)), + swizzle_wwww(srctemp), + builtin_zero); + + /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); + + rc_remove_instruction(inst); +} + +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dst, + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); + + rc_remove_instruction(inst); +} + +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; + + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); + + rc_remove_instruction(inst); +} + +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); +} + +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0; + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dst0, + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); +} + +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_MOV; +} + +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + + rc_remove_instruction(inst); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + + +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes zero to two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst, + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, + 0.0000000000000000001, + &constant_swizzle); + + /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; + emit1(c, inst->Prev, RC_OPCODE_MOV, 0, + dst, + inst->U.I.SrcReg[0]); + + /* MAX dst.y, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); +} + +static void transform_r300_vertex_SEQ(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x = y <==> x >= y && y >= x */ + int tmp = rc_find_free_temporary(c); + + /* x <= y */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y <= x */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x && y = x * y */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SNE(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x != y <==> x < y || y < x */ + int tmp = rc_find_free_temporary(c); + + /* x < y */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y < x */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x > y <==> -x < -y */ + inst->U.I.Opcode = RC_OPCODE_SLT; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x <= y <==> -x >= -y */ + inst->U.I.Opcode = RC_OPCODE_SGE; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst0, + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +/** + * For use with rc_local_transform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SEQ: + if (!c->is_r500) { + transform_r300_vertex_SEQ(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) +{ + static const float SinCosConsts[2][4] = { + { + 1.273239545, /* 4/PI */ + -0.405284735, /* -4/(PI*PI) */ + 3.141592654, /* PI */ + 0.2225 /* weight */ + }, + { + 0.75, + 0.5, + 0.159154943, /* 1/(2*PI) */ + 6.283185307 /* 2*PI */ + } + }; + int i; + + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * inst, + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(src), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(src)), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), + negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +int r300_transform_trig_simple(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + unsigned int constants[2]; + unsigned int tempreg; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + tempreg = rc_find_free_temporary(c); + + sincos_constants(c, constants); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + /* MAD tmp.x, src, 1/(2*PI), 0.75 */ + /* FRC tmp.x, tmp.x */ + /* MAD tmp.z, tmp.x, 2*PI, -PI */ + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else { + struct rc_dst_register dst; + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; + sin_approx(c, inst, dst, + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; + sin_approx(c, inst, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } + + rc_remove_instruction(inst); + + return 1; +} + +static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, + struct rc_instruction *inst, + unsigned srctmp) +{ + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +int radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * so that the input to COS and SIN is always in the range [-PI, PI]. + * SCS is replaced by one COS and one SIN instruction. + */ +int r300_transform_trig_scale_vertex(struct radeon_compiler *c, + struct rc_instruction *inst, + void *unused) +{ + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + /* Repeat x in the range [-PI, PI]: + * + * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI + */ + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +int radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; + + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + return 1; +} + +/** + * IF Temp[0].x -\ + * KILP - > KIL -abs(Temp[0].x) + * ENDIF -/ + * + * This needs to be done in its own pass, because it modifies the instructions + * before and after KILP. + */ +void rc_transform_KILP(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode != RC_OPCODE_KILP) + continue; + + inst->U.I.Opcode = RC_OPCODE_KIL; + + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + inst->U.I.SrcReg[0] = negate(builtin_one); + } else { + + inst->U.I.SrcReg[0] = + negate(absolute(inst->Prev->U.I.SrcReg[0])); + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h new file mode 100644 index 00000000000..b5f361e624f --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_simple( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int radeonTransformTrigScale( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_scale_vertex( + struct radeon_compiler *c, + struct rc_instruction *inst, + void*); + +int radeonTransformDeriv( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +void rc_transform_KILP(struct radeon_compiler * c, + void *user); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h new file mode 100644 index 00000000000..24577333450 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL, + + /** + * Indicates this register should use the result of the presubtract + * operation. + */ + RC_FILE_PRESUB +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) +#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +typedef enum { + RC_PRESUB_NONE = 0, + + /** 1 - 2 * src0 */ + RC_PRESUB_BIAS, + + /** src1 - src0 */ + RC_PRESUB_SUB, + + /** src1 + src0 */ + RC_PRESUB_ADD, + + /** 1 - src0 */ + RC_PRESUB_INV +} rc_presubtract_op; + +static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ + switch(op){ + case RC_PRESUB_BIAS: + case RC_PRESUB_INV: + return 1; + case RC_PRESUB_ADD: + case RC_PRESUB_SUB: + return 2; + default: + return 0; + } +} + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c new file mode 100644 index 00000000000..52315957520 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2008-2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler_util.h" + +#include + +/** + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. + */ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) +{ + int candidate = -1; + int candidate_quality = -1; + unsigned int alpha_used = 0; + unsigned int rgb_used = 0; + int i; + + if ((!rgb && !alpha) || file == RC_FILE_NONE) + return 0; + + /* Make sure only one presubtract operation is used per instruction. */ + if (file == RC_FILE_PRESUB) { + if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + + if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].File != file || + pair->RGB.Src[i].Index != index) { + rgb_used++; + continue; + } + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].File != file || + pair->Alpha.Src[i].Index != index) { + alpha_used++; + continue; + } + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (file == RC_FILE_PRESUB) { + candidate = RC_PAIR_PRESUB_SRC; + } else if (candidate < 0 || (rgb && rgb_used > 2) + || (alpha && alpha_used > 2)) { + return -1; + } + + /* candidate >= 0 */ + + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i = 0; i < src_regs; i++) { + pair->RGB.Src[i].Used = 1; + } + } + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i=0; i < src_regs; i++) { + pair->Alpha.Src[i].Used = 1; + } + } + } + + return candidate; +} + +static void pair_foreach_source_callback( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb, + unsigned int swz, + unsigned int src) +{ + /* swz > 3 means that the swizzle is either not used, or a constant + * swizzle (e.g. 0, 1, 0.5). */ + if(swz > 3) + return; + + if(swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->Alpha.Src[i]); + } + } else { + cb(data, &pair->Alpha.Src[src]); + } + } else { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->RGB.Src[i]); + } + } + else { + cb(data, &pair->RGB.Src[src]); + } + } +} + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->Alpha.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + pair_foreach_source_callback(pair, data, cb, + GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), + pair->Alpha.Arg[i].Source); + } +} + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->RGB.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); + if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) + continue; + } + pair_foreach_source_callback(pair, data, cb, + swz, + pair->RGB.Arg[i].Source); + } +} + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) +{ + unsigned int type; + + type = rc_source_type_swz(arg->Swizzle); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } +} + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src) +{ + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src + || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h new file mode 100644 index 00000000000..a957ea9f7a0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" + +struct radeon_compiler; + + +/** + * \file + * Represents a paired ALU instruction, as found in R300 and R500 + * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate + */ + +/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then + * the presubtract value will be used, and + * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. + */ +#define RC_PAIR_PRESUB_SRC 3 + +struct rc_pair_instruction_source { + unsigned int Used:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct rc_pair_instruction_arg { + unsigned int Source:2; + unsigned int Swizzle:12; + unsigned int Abs:1; + unsigned int Negate:1; +}; + +struct rc_pair_sub_instruction { + unsigned int Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; + unsigned int Target:2; + unsigned int OutputWriteMask:3; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; + + struct rc_pair_instruction_source Src[4]; + struct rc_pair_instruction_arg Arg[3]; +}; + +struct rc_pair_instruction { + struct rc_pair_sub_instruction RGB; + struct rc_pair_sub_instruction Alpha; + + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + unsigned int Nop:1; +}; + +typedef void (*rc_pair_foreach_src_fn) + (void *, struct rc_pair_instruction_source *); + +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src); +/*@}*/ + + +/** + * Compiler passes that operate with the paired format. + */ +/*@{*/ +struct radeon_pair_handler; + +void rc_pair_translate(struct radeon_compiler *cc, void *user); +void rc_pair_schedule(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); +void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); +/*@}*/ + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c new file mode 100644 index 00000000000..390d1319460 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -0,0 +1,418 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static const char * presubtract_op_to_string(rc_presubtract_op op) +{ + switch(op) { + case RC_PRESUB_NONE: + return "NONE"; + case RC_PRESUB_BIAS: + return "(1 - 2 * src0)"; + case RC_PRESUB_SUB: + return "(src1 - src0)"; + case RC_PRESUB_ADD: + return "(src1 + src0)"; + case RC_PRESUB_INV: + return "(1 - src0)"; + default: + return "BAD_PRESUBTRACT_OP"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, 0); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + fprintf(stderr, "bad swz: %u\n", swz); + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_presub_instruction(FILE * f, + struct rc_presub_instruction inst) +{ + fprintf(f,"("); + switch(inst.Opcode){ + case RC_PRESUB_BIAS: + fprintf(f, "1 - 2 * "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_SUB: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_ADD: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " + "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_INV: + fprintf(f, "1 - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + default: + break; + } + fprintf(f, ")"); +} + +static void rc_print_src_register(FILE * f, struct rc_instruction * inst, + struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + if(src.File == RC_FILE_PRESUB) + rc_print_presub_instruction(f, inst->U.I.PreSub); + else + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) +{ + switch (opcode) { + case RC_OPCODE_IF: + case RC_OPCODE_BGNLOOP: + return (*branch_depth)++ * 2; + + case RC_OPCODE_ENDIF: + case RC_OPCODE_ENDLOOP: + assert(*branch_depth > 0); + return --(*branch_depth) * 2; + + case RC_OPCODE_ELSE: + assert(*branch_depth > 0); + return (*branch_depth - 1) * 2; + + default: + return *branch_depth * 2; + } +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? + inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.xyz = %s", + presubtract_op_to_string( + inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.w = %s", + presubtract_op_to_string( + inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->RGB.Arg[arg].Source); + fprintf(f,".%c%c%c%s", + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color[%i].w", inst->Alpha.Target); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->Alpha.Arg[arg].Source); + fprintf(f,".%c%s", + rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + unsigned branch_depth = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst, &branch_depth); + else + rc_print_normal_instruction(stderr, inst, &branch_depth); + + linenum++; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c new file mode 100644 index 00000000000..8d16b2cf9ec --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * Copyright (C) 2010 Marek Olšák + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +#include "radeon_compiler_util.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + + reg.Swizzle = combine_swizzles(reg.Swizzle, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static void scale_texcoords(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst, + unsigned state_constant) +{ + struct rc_instruction *inst_mov; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MUL; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mov->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + state_constant, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +static void projective_divide(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_mul, *inst_rcp; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = temp; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Because the input can be arbitrarily swizzled, + * read the component mapped to W. */ + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + + inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = temp; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_TXD && + inst->U.I.Opcode != RC_OPCODE_TXL && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + } else { + inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + struct rc_instruction * inst_rcp = NULL; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; + unsigned tmp_texsample; + unsigned tmp_sum; + int pass, fail; + + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; + unsigned saturate_mode = inst->U.I.SaturateMode; + + /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp_texsample; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + tmp_sum = rc_find_free_temporary(c); + + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_sum; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + } + + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + } + + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ + + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; + else + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + /* This negates the whole expresion: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp = rc_insert_new_instruction(c, inst_add); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.SaturateMode = saturate_mode; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = + combine_swizzles(RC_SWIZZLE_WWWW, + compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); + inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum); + } + } + + /* R300 cannot sample from rectangles and the wrap mode fallback needs + * normalized coordinates anyway. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { + scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } + + /* Divide by W if needed. */ + if (inst->U.I.Opcode == RC_OPCODE_TXP && + (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { + projective_divide(compiler, inst); + } + + /* Texture wrap modes don't work on NPOT textures. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + + /* NPOT -> POT conversion for 3D textures. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* Saturate XYZ. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + /* Copy W. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); + } + + /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. + * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { + unsigned two, two_swizzle; + struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); + + inst_mul = rc_insert_new_instruction(c, inst); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ + inst_mul->U.I.SrcReg[1].Index = two; + inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; + + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; + + inst_cnd = rc_insert_new_instruction(c, inst_mad); + inst_cnd->U.I.Opcode = RC_OPCODE_CND; + inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_cnd->U.I.DstReg = inst->U.I.DstReg; + inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst->U.I.SaturateMode || + (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h new file mode 100644 index 00000000000..a0105051ac4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c new file mode 100644 index 00000000000..7d76585a593 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2010 Marek Olšák + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_remove_constants.h" +#include "radeon_dataflow.h" + +struct mark_used_data { + unsigned char * const_used; + unsigned * has_rel_addr; +}; + +static void remap_regs(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + unsigned *inv_remap_table = userdata; + + if (*pfile == RC_FILE_CONSTANT) { + *pindex = inv_remap_table[*pindex]; + } +} + +static void mark_used(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct mark_used_data * d = userdata; + + if (src->File == RC_FILE_CONSTANT) { + if (src->RelAddr) { + *d->has_rel_addr = 1; + } else { + d->const_used[src->Index] = 1; + } + } +} + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user) +{ + unsigned **out_remap_table = (unsigned**)user; + unsigned char *const_used; + unsigned *remap_table; + unsigned *inv_remap_table; + unsigned has_rel_addr = 0; + unsigned is_identity = 1; + unsigned are_externals_remapped = 0; + struct rc_constant *constants = c->Program.Constants.Constants; + struct mark_used_data d; + unsigned new_count; + + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } + + const_used = malloc(c->Program.Constants.Count); + memset(const_used, 0, c->Program.Constants.Count); + + d.const_used = const_used; + d.has_rel_addr = &has_rel_addr; + + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_for_all_reads_src(inst, mark_used, &d); + } + + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + const_used[i] = 1; + } + + /* Pass 3: Make the remapping table and remap constants. + * This pass removes unused constants simply by overwriting them by other constants. */ + remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + new_count = 0; + + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (const_used[i]) { + remap_table[new_count] = i; + inv_remap_table[i] = new_count; + + if (i != new_count) { + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + are_externals_remapped = 1; + + constants[new_count] = constants[i]; + is_identity = 0; + } + new_count++; + } + } + + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert( is_identity || new_count < c->Program.Constants.Count); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); + + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_remap_registers(inst, remap_regs, inv_remap_table); + } + } + + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + c->Program.Constants.Count = new_count; + + if (are_externals_remapped) { + *out_remap_table = remap_table; + } else { + *out_remap_table = NULL; + free(remap_table); + } + + free(const_used); + free(inv_remap_table); + + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h new file mode 100644 index 00000000000..f29113b922b --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2010 Marek Olšák + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_REMOVE_CONSTANTS_H +#define RADEON_REMOVE_CONSTANTS_H + +#include "radeon_compiler.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c new file mode 100644 index 00000000000..cafa0579734 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c @@ -0,0 +1,92 @@ +/* + * Copyright 2010 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_rename_regs.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +#include "radeon_program.h" + +/** + * This function renames registers in an attempt to get the code close to + * SSA form. After this function has completed, most of the register are only + * written to one time, with a few exceptions. + * + * This function assumes all the instructions are still of type + * RC_INSTRUCTION_NORMAL. + */ +void rc_rename_regs(struct radeon_compiler *c, void *user) +{ + unsigned int i, used_length; + int new_index; + struct rc_instruction * inst; + struct rc_reader_data reader_data; + unsigned char * used; + + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + + rc_get_used_temporaries(c, used, used_length); + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + continue; + + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; + } + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.I.Src->Index = new_index; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h new file mode 100644 index 00000000000..3baf29f6120 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h @@ -0,0 +1,9 @@ + +#ifndef RADEON_RENAME_REGS_H +#define RADEON_RENAME_REGS_H + +struct radeon_compiler; + +void rc_rename_regs(struct radeon_compiler *c, void *user); + +#endif /* RADEON_RENAME_REGS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h new file mode 100644 index 00000000000..c81d5f7a5e9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c new file mode 100644 index 00000000000..938fb8421f2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.c @@ -0,0 +1,517 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_variable.h" + +#include "memory_pool.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_opcodes.h" +#include "radeon_program.h" + +/** + * Rewrite the index and writemask for the destination register of var + * and its friends to new_index and new_writemask. This function also takes + * care of rewriting the swizzles for the sources of var. + */ +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask) +{ + struct rc_variable * var_ptr; + struct rc_list * readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + unsigned int conversion_swizzle = + rc_make_conversion_swizzle(old_mask, new_writemask); + + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, + conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction * sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, + conversion_swizzle); + } + sub->DestIndex = new_index; + } + } + + readers = rc_variable_readers_union(var); + + for ( ; readers; readers = readers->Next) { + struct rc_reader * reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = rc_rewrite_swizzle( + reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction * pair_inst = + &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz( + reader->U.P.Arg->Swizzle); + + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index( + pair_inst, reader->U.P.Src); + } + /* Try to delete the old src, it is OK if this fails, + * because rc_pair_alloc_source might be able to + * find a source the ca be reused. + */ + if (rc_pair_remove_src(reader->Inst, src_type, + src_index, old_mask)) { + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this becuase it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index] + .Used = 1; + pair_inst->RGB.Src[src_index] + .Index = new_index; + pair_inst->RGB.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index] + .Used = 1; + pair_inst->Alpha.Src[src_index] + .Index = new_index; + pair_inst->Alpha.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + } else { + src_index = rc_pair_alloc_source( + &reader->Inst->U.P, + src_type & RC_SOURCE_RGB, + src_type & RC_SOURCE_ALPHA, + RC_FILE_TEMPORARY, + new_index); + if (src_index < 0) { + rc_error(var->C, "Rewrite of inst %u failed " + "Can't allocate source for " + "Inst %u src_type=%x " + "new_index=%u new_mask=%u\n", + var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); + continue; + } + } + reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( + reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } +} + +/** + * Compute the live intervals for var and its friends. + */ +void rc_variable_compute_live_intervals(struct rc_variable * var) +{ + while(var) { + unsigned int i; + unsigned int start = var->Inst->IP; + + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int chan_start = start; + unsigned int chan_end = var->Readers[i].Inst->IP; + unsigned int mask = var->Readers[i].WriteMask; + struct rc_instruction * inst; + + /* Extend the live interval of T0 to the start of the + * loop for sequences like: + * BGNLOOP + * read T0 + * ... + * write T0 + * ENDLOOP + */ + if (var->Readers[i].Inst->IP < start) { + struct rc_instruction * bgnloop = + rc_match_endloop(var->Readers[i].Inst); + chan_start = bgnloop->IP; + } + + /* Extend the live interval of T0 to the start of the + * loop in case there is a BRK instruction in the loop + * (we don't actually check for a BRK instruction we + * assume there is one somewhere in the loop, which + * there usually is) for sequences like: + * BGNLOOP + * ... + * conditional BRK + * ... + * write T0 + * ENDLOOP + * read T0 + *************************************************** + * Extend the live interval of T0 to the end of the + * loop for sequences like: + * write T0 + * BGNLOOP + * ... + * read T0 + * ENDLOOP + */ + for (inst = var->Inst; inst != var->Readers[i].Inst; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + struct rc_instruction * bgnloop = + rc_match_endloop(inst); + if (bgnloop->IP < chan_start) { + chan_start = bgnloop->IP; + } + } else if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > chan_end) { + chan_end = endloop->IP; + } + } + } + + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + if (!var->Live[chan].Used + || chan_start < var->Live[chan].Start) { + var->Live[chan].Start = + chan_start; + } + if (!var->Live[chan].Used + || chan_end > var->Live[chan].End) { + var->Live[chan].End = chan_end; + } + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } +} + +/** + * @return 1 if a and b share a reader + * @return 0 if they do not + */ +static unsigned int readers_intersect( + struct rc_variable * a, + struct rc_variable * b) +{ + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_a.U.I.Src == reader_b.U.I.Src) { + + return 1; + } + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR + && reader_b.Inst->Type == RC_INSTRUCTION_PAIR + && reader_a.U.P.Src == reader_b.U.P.Src) { + + return 1; + } + } + } + return 0; +} + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend) +{ + assert(var->Dst.Index == friend->Dst.Index); + while(var->Friend) { + var = var->Friend; + } + var->Friend = friend; +} + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data) +{ + struct rc_variable * new = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; +} + +static void get_variable_helper( + struct rc_list ** variable_list, + struct rc_variable * variable) +{ + struct rc_list * list_ptr; + for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { + if (readers_intersect(variable, list_ptr->Item)) { + rc_variable_add_friend(list_ptr->Item, variable); + return; + } + } + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); +} + +static void get_variable_pair_helper( + struct rc_list ** variable_list, + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub_inst) +{ + struct rc_reader_data reader_data; + struct rc_variable * new_var; + rc_register_file file; + unsigned int writemask; + + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.ReaderCount == 0) { + return; + } + + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, + &reader_data); + get_variable_helper(variable_list, new_var); +} + +/** + * Generate a list of variables used by the shader program. Each instruction + * that writes to a register is considered a variable. The struct rc_variable + * data structure includes a list of readers and is essentially a + * definition-use chain. Any two variables that share a reader are considered + * "friends" and they are linked together via the Friend attribute. + */ +struct rc_list * rc_get_variables(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + struct rc_list * variable_list = NULL; + + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&variable_list, new_var); + } else { + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.RGB); + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.Alpha); + } + } + + return variable_list; +} + +/** + * @return The bitwise or of the writemasks of a variable and all of its + * friends. + */ +unsigned int rc_variable_writemask_sum(struct rc_variable * var) +{ + unsigned int writemask = 0; + while(var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; +} + +/* + * @return A list of readers for a variable and its friends. Readers + * that read from two different variable friends are only included once in + * this list. + */ +struct rc_list * rc_variable_readers_union(struct rc_variable * var) +{ + struct rc_list * list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list * temp; + struct rc_reader * a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader * b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg + && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; +} + +static unsigned int reader_equals_src( + struct rc_reader reader, + unsigned int src_type, + void * src) +{ + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } +} + +static unsigned int variable_writes_src( + struct rc_variable * var, + unsigned int src_type, + void * src) +{ + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; +} + + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src) +{ + struct rc_list * list_ptr; + struct rc_list * writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable * var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable * friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; + friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, + rc_list(&var->C->Pool, friend)); + } + } + /* Once we have indentifed the variable and its + * friends that write this source, we can stop + * stop searching, because we know know of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; +} + +void rc_variable_print(struct rc_variable * var) +{ + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", + var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, + var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h new file mode 100644 index 00000000000..9427bee18a7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.h @@ -0,0 +1,89 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_VARIABLE_H +#define RADEON_VARIABLE_H + +#include "radeon_compiler.h" + +struct radeon_compiler; +struct rc_list; +struct rc_reader_data; +struct rc_readers; + +struct live_intervals { + int Start; + int End; + int Used; +}; + +struct rc_variable { + struct radeon_compiler * C; + struct rc_dst_register Dst; + + struct rc_instruction * Inst; + unsigned int ReaderCount; + struct rc_reader * Readers; + struct live_intervals Live[4]; + + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable * Friend; +}; + +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask); + +void rc_variable_compute_live_intervals(struct rc_variable * var); + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend); + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data); + +struct rc_list * rc_get_variables(struct radeon_compiler * c); + +unsigned int rc_variable_writemask_sum(struct rc_variable * var); + +struct rc_list * rc_variable_readers_union(struct rc_variable * var); + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src); + +void rc_variable_print(struct rc_variable * var); + +#endif /* RADEON_VARIABLE_H */ diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore new file mode 100644 index 00000000000..85672fed777 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/.gitignore @@ -0,0 +1 @@ +radeon_compiler_util_tests diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile new file mode 100644 index 00000000000..6eda34a2c00 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/Makefile @@ -0,0 +1,53 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +CFLAGS += -Wall -Werror + +### Basic defines ### +TESTS = radeon_compiler_util_tests + +TEST_SOURCES := $(TESTS:=.c) + +SHARED_SOURCES = \ + rc_test_helpers.c \ + unit_test.c + +C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES) + +INCLUDES = \ + -I. \ + -I.. + +COMPILER_LIB = ../../libr300.a + +##### TARGETS ##### + +default: depend run_tests + +depend: $(C_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null + +# Remove .o and backup files +clean: + rm -f $(TESTS) depend depend.bak + +$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB) + $(APP_CC) -o $@ $^ + +run_tests: $(TESTS) + @echo "RUNNING TESTS:" + @echo "" + $(foreach test, $^, @./$(test)) + +.PHONY: $(COMPILER_LIB) +$(COMPILER_LIB): + $(MAKE) -C ../.. + +##### RULES ##### +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c new file mode 100644 index 00000000000..a2e3f2ab2e5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c @@ -0,0 +1,76 @@ +#include +#include +#include + +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +#include "rc_test_helpers.h" +#include "unit_test.h" + +static void test_rc_inst_can_use_presub( + struct test_result * result, + int expected, + const char * add_str, + const char * replace_str) +{ + struct rc_instruction add_inst, replace_inst; + int ret; + + test_begin(result); + init_rc_normal_instruction(&add_inst, add_str); + init_rc_normal_instruction(&replace_inst, replace_str); + + ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, + &replace_inst.U.I.SrcReg[0], + &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); + + test_check(result, ret == expected); +} + +static void test_runner_rc_inst_can_use_presub(struct test_result * result) +{ + + /* This tests the case where the source being replace has the same + * register file and register index as another source register in the + * CMP instruction. A previous version of this function was ignoring + * all registers that shared the same file and index as the replacement + * register when counting the number of source selects. + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[0].z, temp[6].__x_, const[1].__x_;", + "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); + + + /* Testing a random case that should fail + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3], temp[1], temp[2];", + "MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); + + /* This tests the case where the arguments of the ADD + * instruction share the same register file and index. Normally, we + * would need only one source select for these two arguments, but since + * they will be part of a presubtract operation we need to use the two + * source selects that the presubtract instruction expects + * (src0 and src1). + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3].x, temp[0].x___, temp[0].x___;", + "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); +} + +int main(int argc, char ** argv) +{ + struct test tests[] = { + {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, + {NULL, NULL} + }; + run_tests(tests); +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c new file mode 100644 index 00000000000..ca4738af54d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c @@ -0,0 +1,380 @@ +#include +#include +#include +#include +#include +#include + +#include "../radeon_compiler_util.h" +#include "../radeon_opcodes.h" +#include "../radeon_program.h" + +#include "rc_test_helpers.h" + +/* This file contains some helper functions for filling out the rc_instruction + * data structures. These functions take a string as input based on the format + * output by rc_program_print(). + */ + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +#define REGEX_ERR_BUF_SIZE 50 + +struct match_info { + const char * String; + int Length; +}; + +static int match_length(regmatch_t * matches, int index) +{ + return matches[index].rm_eo - matches[index].rm_so; +} + +static int regex_helper( + const char * regex_str, + const char * search_str, + regmatch_t * matches, + int num_matches) +{ + char err_buf[REGEX_ERR_BUF_SIZE]; + regex_t regex; + int err_code; + unsigned int i; + + err_code = regcomp(®ex, regex_str, REG_EXTENDED); + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to compile regex: %s\n", err_buf); + return 0; + } + + err_code = regexec(®ex, search_str, num_matches, matches, 0); + DBG("Search string: '%s'\n", search_str); + for (i = 0; i < num_matches; i++) { + DBG("Match %u start = %d end = %d\n", i, + matches[i].rm_so, matches[i].rm_eo); + } + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to match regex: %s\n", err_buf); + return 0; + } + return 1; +} + +#define REGEX_SRC_MATCHES 6 + +struct src_tokens { + struct match_info Negate; + struct match_info Abs; + struct match_info File; + struct match_info Index; + struct match_info Swizzle; +}; + +/** + * Initialize the source register at index src_index for the instruction based + * on src_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param src_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str) +{ + const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)"; + regmatch_t matches[REGEX_SRC_MATCHES]; + struct src_tokens tokens; + struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { + fprintf(stderr, "Failed to execute regex for src register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.Negate.String = src_str + matches[1].rm_so; + tokens.Negate.Length = match_length(matches, 1); + tokens.Abs.String = src_str + matches[2].rm_so; + tokens.Abs.Length = match_length(matches, 2); + tokens.File.String = src_str + matches[3].rm_so; + tokens.File.Length = match_length(matches, 3); + tokens.Index.String = src_str + matches[4].rm_so; + tokens.Index.Length = match_length(matches, 4); + tokens.Swizzle.String = src_str + matches[5].rm_so; + tokens.Swizzle.Length = match_length(matches, 5); + + /* Negate */ + if (tokens.Negate.Length > 0) { + src_reg->Negate = RC_MASK_XYZW; + } + + /* Abs */ + if (tokens.Abs.Length > 0) { + src_reg->Abs = 1; + } + + /* File */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + src_reg->File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { + src_reg->File = RC_FILE_INPUT; + } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { + src_reg->File = RC_FILE_CONSTANT; + } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { + src_reg->File = RC_FILE_NONE; + } + + /* Index */ + errno = 0; + src_reg->Index = strtol(tokens.Index.String, NULL, 10); + if (errno > 0) { + fprintf(stderr, "Could not convert src register index.\n"); + return 0; + } + + /* Swizzle */ + if (tokens.Swizzle.Length == 0) { + src_reg->Swizzle = RC_SWIZZLE_XYZW; + } else { + int str_index = 1; + src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); + if (tokens.Swizzle.String[0] != '.') { + fprintf(stderr, "First char of swizzle is not valid.\n"); + return 0; + } + for (i = 0; i < 4; i++, str_index++) { + if (tokens.Swizzle.String[str_index] == '-') { + src_reg->Negate |= (1 << i); + str_index++; + } + switch(tokens.Swizzle.String[str_index]) { + case 'x': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); + break; + case 'y': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); + break; + case 'z': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); + break; + case 'w': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); + break; + case '1': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); + break; + case '0': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); + break; + case 'H': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); + break; + case '_': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); + break; + default: + fprintf(stderr, "Unknown src register swizzle.\n"); + return 0; + } + } + } + DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", + src_reg->File, src_reg->Index, src_reg->Swizzle, + src_reg->Negate, src_reg->Abs); + return 1; +} + +#define REGEX_DST_MATCHES 4 + +struct dst_tokens { + struct match_info File; + struct match_info Index; + struct match_info WriteMask; +}; + +/** + * Initialize the destination for the instruction based on dst_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param dst_str A string that represents the destination register. The format + * for this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str) +{ + const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)"; + regmatch_t matches[REGEX_DST_MATCHES]; + struct dst_tokens tokens; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { + fprintf(stderr, "Failed to execute regex for dst register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.File.String = dst_str + matches[1].rm_so; + tokens.File.Length = match_length(matches, 1); + tokens.Index.String = dst_str + matches[2].rm_so; + tokens.Index.Length = match_length(matches, 2); + tokens.WriteMask.String = dst_str + matches[3].rm_so; + tokens.WriteMask.Length = match_length(matches, 3); + + /* File Type */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + } else { + fprintf(stderr, "Unknown dst register file type.\n"); + return 0; + } + + /* File Index */ + errno = 0; + inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); + + if (errno > 0) { + fprintf(stderr, "Could not convert dst register index\n"); + return 0; + } + + /* WriteMask */ + if (tokens.WriteMask.Length == 0) { + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } else { + /* The first character should be '.' */ + if (tokens.WriteMask.String[0] != '.') { + fprintf(stderr, "1st char of writemask is not valid.\n"); + return 0; + } + for (i = 1; i < tokens.WriteMask.Length; i++) { + switch(tokens.WriteMask.String[i]) { + case 'x': + inst->U.I.DstReg.WriteMask |= RC_MASK_X; + break; + case 'y': + inst->U.I.DstReg.WriteMask |= RC_MASK_Y; + break; + case 'z': + inst->U.I.DstReg.WriteMask |= RC_MASK_Z; + break; + case 'w': + inst->U.I.DstReg.WriteMask |= RC_MASK_W; + break; + default: + fprintf(stderr, "Unknown swizzle in writemask.\n"); + return 0; + } + } + } + DBG("Dst Reg File=%u Index=%d Writemask=%d\n", + inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask); + return 1; +} + +#define REGEX_INST_MATCHES 7 + +struct inst_tokens { + struct match_info Opcode; + struct match_info Sat; + struct match_info Dst; + struct match_info Srcs[3]; +}; + +/** + * Initialize a normal instruction based on inst_str. + * + * WARNING: This function might not be able to handle every kind of format that + * rc_program_print() can output. If you are having problems with a + * particular string, you may need to add support for it to this functions. + * + * @param inst_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str) +{ + const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)"; + int i; + regmatch_t matches[REGEX_INST_MATCHES]; + struct inst_tokens tokens; + + /* Initialize inst */ + memset(inst, 0, sizeof(struct rc_instruction)); + inst->Type = RC_INSTRUCTION_NORMAL; + + /* Execute the regex */ + if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { + return 0; + } + memset(&tokens, 0, sizeof(tokens)); + + /* Create Tokens */ + tokens.Opcode.String = inst_str + matches[1].rm_so; + tokens.Opcode.Length = match_length(matches, 1); + if (matches[2].rm_so > -1) { + tokens.Sat.String = inst_str + matches[2].rm_so; + tokens.Sat.Length = match_length(matches, 2); + } + + + /* Fill out the rest of the instruction. */ + for (i = 0; i < MAX_RC_OPCODE; i++) { + const struct rc_opcode_info * info = rc_get_opcode_info(i); + unsigned int first_src = 3; + unsigned int j; + if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { + continue; + } + inst->U.I.Opcode = info->Opcode; + if (info->HasDstReg) { + char * dst_str; + tokens.Dst.String = inst_str + matches[3].rm_so; + tokens.Dst.Length = match_length(matches, 3); + first_src++; + + dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); + strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); + dst_str[tokens.Dst.Length] = '\0'; + init_rc_normal_dst(inst, dst_str); + free(dst_str); + } + for (j = 0; j < info->NumSrcRegs; j++) { + char * src_str; + tokens.Srcs[j].String = + inst_str + matches[first_src + j].rm_so; + tokens.Srcs[j].Length = + match_length(matches, first_src + j); + + src_str = malloc(sizeof(char) * + (tokens.Srcs[j].Length + 1)); + strncpy(src_str, tokens.Srcs[j].String, + tokens.Srcs[j].Length); + src_str[tokens.Srcs[j].Length] = '\0'; + init_rc_normal_src(inst, j, src_str); + } + break; + } + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h new file mode 100644 index 00000000000..1a6bf9699ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h @@ -0,0 +1,13 @@ + +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str); + +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str); + +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str); diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c new file mode 100644 index 00000000000..266f3365c58 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.c @@ -0,0 +1,35 @@ +#include +#include +#include + +#include "unit_test.h" + +void run_tests(struct test tests[]) +{ + int i; + for (i = 0; tests[i].name; i++) { + printf("Test %s\n", tests[i].name); + memset(&tests[i].result, 0, sizeof(tests[i].result)); + tests[i].test_func(&tests[i].result); + printf("Test %s (%d/%d) pass\n", tests[i].name, + tests[i].result.pass, tests[i].result.test_count); + } +} + +void test_begin(struct test_result * result) +{ + result->test_count++; +} + +void test_check(struct test_result * result, int cond) +{ + printf("Subtest %u -> ", result->test_count); + if (cond) { + result->pass++; + printf("Pass"); + } else { + result->fail++; + printf("Fail"); + } + printf("\n"); +} diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h new file mode 100644 index 00000000000..441e8b655a5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.h @@ -0,0 +1,17 @@ + +struct test_result { + unsigned int test_count; + unsigned int pass; + unsigned int fail; +}; + +struct test { + const char * name; + void (*test_func)(struct test_result * result); + struct test_result result; +}; + +void run_tests(struct test tests[]); + +void test_begin(struct test_result * result); +void test_check(struct test_result * result, int cond); diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6c1c9d2fb13..234e043b071 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -24,7 +24,6 @@ #define R300_EMIT_H #include "r300_context.h" -#include "radeon_code.h" struct rX00_fragment_program_code; struct r300_vertex_program_code; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index e3a1bc4a0f4..a9fd3ad40dd 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -38,8 +38,7 @@ #include "r300_texture.h" #include "r300_tgsi_to_rc.h" -#include "radeon_code.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about FS input semantics to r300_shader_semantics. */ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index c86a90b85ae..45c9e8801c3 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -27,7 +27,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_shader_semantics.h" struct r300_fragment_shader_code { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index bb30b1ab0be..5edbb22a743 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTC_D2A (3 << 23) # define R300_ALU_OUTC_MIN (4 << 23) # define R300_ALU_OUTC_MAX (5 << 23) -# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CND (7 << 23) # define R300_ALU_OUTC_CMP (8 << 23) # define R300_ALU_OUTC_FRC (9 << 23) # define R300_ALU_OUTC_REPL_ALPHA (10 << 23) @@ -2944,6 +2944,23 @@ enum { /*\}*/ +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /* BEGIN: Packet 3 commands */ /* A primitive emission dword. */ @@ -3249,6 +3266,8 @@ enum { # define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_ALPHA_CLAMP (1 << 20) # define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) +# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) # define R500_INST_ALPHA_PRED_INV (1 << 22) # define R500_INST_ALU_RESULT_OP_EQ (0 << 23) # define R500_INST_ALU_RESULT_OP_LT (1 << 23) diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 0561ab9bfa4..07a3f3caee7 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -22,8 +22,7 @@ #include "r300_tgsi_to_rc.h" -#include "radeon_compiler.h" -#include "radeon_program.h" +#include "compiler/radeon_compiler.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b319890157f..a5e8fd680ff 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -32,7 +32,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 170de6c79db..a482ddce9c9 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -26,7 +26,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_context.h" #include "r300_shader_semantics.h" From d4d5e3a336f4c1f2208faad57a985f711b09d86d Mon Sep 17 00:00:00 2001 From: Tobias Droste Date: Thu, 14 Jul 2011 22:32:58 +0200 Subject: [PATCH 098/600] egl/gallium: fix build without softpipe and llvmpipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tobias Droste Acked-by: Jakob Bornecrantz Reviewed-by: Marek Olšák --- src/gallium/targets/egl-static/Makefile | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 69e7eecdf0c..5b7b330a1cd 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -141,10 +141,18 @@ egl_LIBS += \ $(TOP)/src/gallium/drivers/svga/libsvga.a endif -# swrast +# softpipe +ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),) egl_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE egl_LIBS += $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a egl_SYS += -lm +endif + +# llvmpipe +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) +egl_CPPFLAGS += -DGALLIUM_LLVMPIPE +egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +endif # sort to remove duplicates egl_CPPFLAGS := $(sort $(egl_CPPFLAGS)) @@ -158,8 +166,6 @@ st_GL_SYS := -lm -lpthread $(DLOPEN_LIBS) # LLVM ifeq ($(MESA_LLVM),1) -egl_CPPFLAGS += -DGALLIUM_LLVMPIPE -egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a egl_SYS += $(LLVM_LIBS) LDFLAGS += $(LLVM_LDFLAGS) From 79dcfb266aa6ff14ff21c0b6dddef6060b450c32 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Wed, 27 Jul 2011 10:04:51 +0200 Subject: [PATCH 099/600] wayland-drm: Add copyright notice to protocol Fixes build since wayland 986703ac7365bc87a5501714adb9fc73157c62b7. --- .../wayland-drm/protocol/wayland-drm.xml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml index 0331f124e80..cde943060ca 100644 --- a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml +++ b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml @@ -1,5 +1,32 @@ + + + Copyright © 2008-2011 Kristian Høgsberg + Copyright © 2010-2011 Intel Corporation + + Permission to use, copy, modify, distribute, and sell this + software and its documentation for any purpose is hereby granted + without fee, provided that\n the above copyright notice appear in + all copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + the copyright holders not be used in advertising or publicity + pertaining to distribution of the software without specific, + written prior permission. The copyright holders make no + representations about the suitability of this software for any + purpose. It is provided "as is" without express or implied + warranty. + + THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + THIS SOFTWARE. + + From 58c04435b12a104b1996fac4d3a3d345f31bd4e7 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 27 Jul 2011 12:13:37 +0200 Subject: [PATCH 100/600] mesa: don't forget about sampleBuffers in framebuffer visual update Otherwise multisample will never been enabled for multisample renderbuffers. Reviewed-by: Brian Paul --- src/mesa/main/framebuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index e27569a6fac..23fa1b2c11e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; fb->Visual.samples = rb->NumSamples; + fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) fb->Visual.sRGBCapable = ctx->Const.sRGBCapable; break; From 5e1b7097f3d6fa60e563c8d629bbda1c34efb3c1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 27 Jul 2011 11:35:31 -0700 Subject: [PATCH 101/600] glsl: Remove completed items from the TODO list --- src/glsl/TODO | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/glsl/TODO b/src/glsl/TODO index a3762384ff2..72133995cea 100644 --- a/src/glsl/TODO +++ b/src/glsl/TODO @@ -11,28 +11,12 @@ 1.30 features: -- Implement AST-to-HIR conversion of bit-shift operators. - -- Implement AST-to-HIR conversion of bit-wise {&,|,^,!} operators. - - Implement AST-to-HIR conversion of switch-statements - switch - case - Update break to correcly handle mixed nexting of switch-statements and loops. -- Handle currently unsupported constant expression types - - ir_unop_bit_not - - ir_binop_mod - - ir_binop_lshift - - ir_binop_rshift - - ir_binop_bit_and - - ir_binop_bit_xor - - ir_binop_bit_or - -- Implement support for 1.30 style shadow compares which only return a float - instead of a vec4. - - Implement support for gl_ClipDistance. This is non-trivial because gl_ClipDistance is exposed as a float[8], but all hardware actually implements it as vec4[2]. \ No newline at end of file From f622c6d7a23c480f6a17e4b3f81731231180e019 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 27 Jul 2011 11:37:30 -0700 Subject: [PATCH 102/600] glsl: Add source location tracking to TODO list --- src/glsl/TODO | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/glsl/TODO b/src/glsl/TODO index 72133995cea..c99d7e152d6 100644 --- a/src/glsl/TODO +++ b/src/glsl/TODO @@ -9,6 +9,11 @@ - Implement support for ir_binop_dot in ir_algebraic.cpp. Perform transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y. +- Track source locations throughout the IR. There are currently several + places where we cannot emit line numbers for errors (and currently emit 0:0) + because we've "lost" the line number information. This is particularly + noticeable at link time. + 1.30 features: - Implement AST-to-HIR conversion of switch-statements From c6f59fcd00101a2f93a5a97d679f3b160ef0126a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 Jul 2011 01:05:13 +0200 Subject: [PATCH 103/600] configure.ac: fix xlib-based softpipe build Tested-by: Jon TURNEY NOTE: This is a candidate for the 7.11 branch. --- configure.ac | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index 5c832e64669..40924a966c5 100644 --- a/configure.ac +++ b/configure.ac @@ -1936,11 +1936,12 @@ if test "x$with_gallium_drivers" != x; then gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau" ;; xswrast) + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" + if test "x$MESA_LLVM" = x1; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe" + fi + if test "x$HAVE_ST_DRI" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" - if test "x$MESA_LLVM" = x1; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe" - fi GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast" fi if test "x$HAVE_ST_VDPAU" = xyes; then @@ -1958,9 +1959,6 @@ if test "x$with_gallium_drivers" != x; then if test "x$HAVE_WINSYS_XLIB" != xyes; then GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib" fi - if test "x$HAVE_ST_DRI" != xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" - fi fi ;; *) From 0aed27ee37860ba332df776425d89d97ca1168b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 Jul 2011 01:05:51 +0200 Subject: [PATCH 104/600] configure.ac: add DLOPEN_LIBS to xlib build Otherwise xlib-based llvmpipe fails to link. NOTE: This is a candidate for the 7.11 branch. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 40924a966c5..1b1823a211f 100644 --- a/configure.ac +++ b/configure.ac @@ -951,7 +951,7 @@ xyesyes) GL_PC_LIB_PRIV="$GL_LIB_DEPS" GL_PC_CFLAGS="$X11_INCLUDES" fi - GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread" + GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread $DLOPEN_LIBS" GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread" # if static, move the external libraries to the programs From fe33c886a79f49378e5719909a51e794b7bb1c38 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 27 Jul 2011 10:35:17 -0700 Subject: [PATCH 105/600] glsl: improve the accuracy of the radians() builtin function The constant used in the radians() function didn't have enough precision, causing a relative error of 1.676e-5, which is far worse than the precision of 32-bit floats. This patch reduces the relative error to 1.14e-9, which is the best we can do in 32 bits. Fixes piglit tests {fs,vs}-radians-{float,vec2,vec3,vec4}. Reviewed-by: Kenneth Graunke --- src/glsl/builtins/ir/radians | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/glsl/builtins/ir/radians b/src/glsl/builtins/ir/radians index 6a0f5d2e219..a419101cf16 100644 --- a/src/glsl/builtins/ir/radians +++ b/src/glsl/builtins/ir/radians @@ -2,20 +2,20 @@ (signature float (parameters (declare (in) float arg0)) - ((return (expression float * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression float * (var_ref arg0) (constant float (0.0174532925)))))) (signature vec2 (parameters (declare (in) vec2 arg0)) - ((return (expression vec2 * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression vec2 * (var_ref arg0) (constant float (0.0174532925)))))) (signature vec3 (parameters (declare (in) vec3 arg0)) - ((return (expression vec3 * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression vec3 * (var_ref arg0) (constant float (0.0174532925)))))) (signature vec4 (parameters (declare (in) vec4 arg0)) - ((return (expression vec4 * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression vec4 * (var_ref arg0) (constant float (0.0174532925)))))) )) From 3e1fd13f605f16e8b48f3a9b71910a3c66eb84b5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Jul 2011 14:27:07 -0700 Subject: [PATCH 106/600] i965/gen4: Fix message parameter loading for 1D TXD sampling. We were neglecting to load dvdx and dvdy. v is not optional. Fixes glslparsertests tex-grad-0[12345].frag on Broadwater/Crestline. (We still need an execution test using sampler1D.) NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Eric Anholt Reviewed-by: Ian Romanick Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9632aae64b0..b82dfd5ead4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -622,6 +622,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * dPdx = dudx, dvdx, drdx * dPdy = dudy, dvdy, drdy * + * 1-arg: Does not exist. + * * 2-arg: dudx dvdx dudy dvdy * dPdx.x dPdx.y dPdy.x dPdy.y * m4 m5 m6 m7 @@ -633,14 +635,14 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); dPdx.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); dPdy.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. From 15c0bc5eefc89bec537e412c02965f201fb1c011 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Jul 2011 17:06:13 -0700 Subject: [PATCH 107/600] i965: Check actual tile offsets in Gen4 miptree workaround. The purpose of the (irb->draw_offset & 4095) != 0 check was to ensure that we don't have XYy offsets into a tile, since Gen4 hardware doesn't support that. However, it's insufficient: there are cases where draw_offset & 4095 is 0 but we still have a Y-offset. This leads to an assertion failure in brw_update_renderbuffer_surface with tile_y != 0. Instead, simply call intel_renderbuffer_tile_offsets to compute the actual X/Y offsets and check if either are non-zero. This makes both the workaround and the assertion check the same things. Fixes piglit test fbo-generatemipmap-formats, and should also fix bugs #34009 and #39487. NOTE: This is a candidate for stable release branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34009 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39487 Reviewed-by: Eric Anholt Reviewed-by: Chad Versace Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_fbo.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 35be3257ab3..7d6d9f271e6 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -647,6 +647,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } +#ifndef I915 +static bool +need_tile_offset_workaround(struct brw_context *brw, + struct intel_renderbuffer *irb) +{ + uint32_t tile_x, tile_y; + + if (brw->has_surface_tile_offset) + return false; + + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + return tile_x != 0 || tile_y != 0; +} +#endif + /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -700,8 +716,7 @@ intel_render_texture(struct gl_context * ctx, intel_image->used_as_render_target = GL_TRUE; #ifndef I915 - if (!brw_context(ctx)->has_surface_tile_offset && - (irb->draw_offset & 4095) != 0) { + if (need_tile_offset_workaround(brw_context(ctx), irb)) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile From f73caddd3339d284556036d031ab30ce8057a510 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Jul 2011 21:13:43 -0700 Subject: [PATCH 108/600] i965: Remove the now unused intel_renderbuffer::draw_offset field. The previous commit removed the last use of this field. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_fbo.c | 1 - src/mesa/drivers/dri/intel/intel_fbo.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 7d6d9f271e6..e48d6ef9cbd 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -606,7 +606,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, zoffset, &dst_x, &dst_y); - irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; irb->draw_x = dst_x; irb->draw_y = dst_y; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index f7f99a4f00c..2487994fde5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -58,7 +58,6 @@ struct intel_renderbuffer /** \} */ - GLuint draw_offset; /**< Offset of drawing address within the region */ GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; From 95ee961f77119826382cfbc617334aed986b72e5 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 29 Jul 2011 00:33:31 +0400 Subject: [PATCH 109/600] r600g: fix vs export count Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39572 Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/r600_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4605c833dea..152c5cf13a0 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2319,7 +2319,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 01406f2bad6..294c400caa1 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2086,7 +2086,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028868_SQ_PGM_RESOURCES_VS, From 58d6aa82878fc901d4dadd39e308a5d88b064997 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jul 2011 15:49:39 -0600 Subject: [PATCH 110/600] st/mesa: fix comment language --- src/mesa/state_tracker/st_atom_texture.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 800a9f1f0e0..3115a2511ce 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { - /* don't do sRGB->RGB conversion. Interpret the texture - * texture data as linear values. - */ + /* Don't do sRGB->RGB conversion. Interpret the texture data as + * linear values. + */ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat); firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); From 26684e0b1a857cc16a2c6f2b542e5ccf3da5acf5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 28 Jul 2011 09:43:09 -0600 Subject: [PATCH 111/600] mesa: test against MESA_FORMAT_NONE in _mesa_GetTexLevelParameteriv() --- src/mesa/main/texparam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index c4ec29533e2..3f771f08bc6 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, texObj = _mesa_select_tex_object(ctx, texUnit, target); img = _mesa_select_tex_image(ctx, texObj, target, level); - if (!img || !img->TexFormat) { + if (!img || img->TexFormat == MESA_FORMAT_NONE) { /* undefined texture image */ if (pname == GL_TEXTURE_COMPONENTS) *params = 1; From e4fdc95277bd323d8945e20635d3a1702a2e695d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 28 Jul 2011 09:51:30 -0600 Subject: [PATCH 112/600] mesa: fix format selection for meta CopyTexSubImage() When we do a glReadPixels into the temporary buffer, we don't want to use GL_LUMINANCE, GL_LUMINANCE_ALPHA or GL_INTENSITY since they will compute L=R+G+B which is not what we want. This bug has existed all along but was only exposed by the elimination of the driver hook for glCopyTexImage() in 5874890c26f434f54e9218b83fae4eb8175c24e9. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39604 Tested-by: Ian Romanick --- src/mesa/drivers/common/meta.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 26c89519679..f9b4755988b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2869,6 +2869,16 @@ copy_tex_sub_image(struct gl_context *ctx, /* Choose format/type for temporary image buffer */ format = _mesa_get_format_base_format(texImage->TexFormat); + if (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_INTENSITY) { + /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the + * temp image buffer because glReadPixels will do L=R+G+B which is + * not what we want (should be L=R). + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { From f79e3518b4e39cd27f679c402e715154f63107f6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 13 Jul 2011 16:08:42 -0700 Subject: [PATCH 113/600] softpipe: When doing write_all_cbufs, don't stomp over the color. We have to make it through this loop processing the color multiple times, so we can't go overwriting it on our first color buffer. Reviewed-by: Brian Paul --- src/gallium/drivers/softpipe/sp_quad_blend.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 82f9785e32a..c881194768a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -817,17 +817,25 @@ blend_fallback(struct quad_stage *qs, quads[0]->input.y0); boolean has_dst_alpha = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); - uint q, i, j, qbuf; - - qbuf = write_all ? 0 : cbuf; + uint q, i, j; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; float (*quadColor)[4]; + float temp_quad_color[QUAD_SIZE][4]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - quadColor = quad->output.color[qbuf]; + if (write_all) { + for (j = 0; j < QUAD_SIZE; j++) { + for (i = 0; i < 4; i++) { + temp_quad_color[i][j] = quad->output.color[0][i][j]; + } + } + quadColor = temp_quad_color; + } else { + quadColor = quad->output.color[cbuf]; + } /* get/swizzle dest colors */ From 83f5d5e6aa58754f52c3579c27d810c497fe13a3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 18:42:21 -0700 Subject: [PATCH 114/600] Add dependency generation for Mesa and GLSL dricore objects. Reviewed-By: Christopher James Halse Rogers --- src/glsl/Makefile | 1 + src/mesa/Makefile | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 005b51d724b..c20a6c9edd9 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -164,6 +164,7 @@ depend: $(ALL_SOURCES) Makefile rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null + $(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null # Remove .o and backup files clean: clean-dricore diff --git a/src/mesa/Makefile b/src/mesa/Makefile index a903a260ac9..88f31b68695 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore include sources.mak # adjust object dirs +DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) -DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) - # define preprocessor flags MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) @@ -124,6 +123,8 @@ depend: $(ALL_SOURCES) @ touch depend @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ $(ALL_SOURCES) > /dev/null 2>/dev/null + @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null ###################################################################### # Installation rules From a5ab46909e9475da0eb8c814efb8e1859a6e6ed3 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 28 Jul 2011 13:33:55 +0900 Subject: [PATCH 115/600] egl: make pixmaps and pbuffers EGL_BUFFER_PRESERVED eglSwapBuffers is no-op to these surface types anyway. --- src/egl/main/eglsurface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index c9cfb01388e..3564ecd01b0 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -269,11 +269,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, { const char *func; EGLint renderBuffer = EGL_BACK_BUFFER; + EGLint swapBehavior = EGL_BUFFER_PRESERVED; EGLint err; switch (type) { case EGL_WINDOW_BIT: func = "eglCreateWindowSurface"; + swapBehavior = EGL_BUFFER_DESTROYED; break; case EGL_PIXMAP_BIT: func = "eglCreatePixmapSurface"; @@ -315,7 +317,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, surf->MipmapLevel = 0; surf->MultisampleResolve = EGL_MULTISAMPLE_RESOLVE_DEFAULT; - surf->SwapBehavior = EGL_BUFFER_DESTROYED; + surf->SwapBehavior = swapBehavior; surf->HorizontalResolution = EGL_UNKNOWN; surf->VerticalResolution = EGL_UNKNOWN; From d6a9564854601bd01a1132f0a17fcab1d2a41481 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 28 Jul 2011 16:03:11 +0900 Subject: [PATCH 116/600] egl: EGL_MATCH_NATIVE_NATIVE_PIXMAP cannot be EGL_DONT_CARE --- src/egl/main/eglconfig.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index 483d9807cf0..e1d53da3cd5 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -529,8 +529,9 @@ _eglParseConfigAttribList(_EGLConfig *conf, _EGLDisplay *dpy, if (!_eglValidateConfig(conf, EGL_TRUE)) return EGL_FALSE; - /* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */ - if (conf->Level == EGL_DONT_CARE) + /* EGL_LEVEL and EGL_MATCH_NATIVE_PIXMAP cannot be EGL_DONT_CARE */ + if (conf->Level == EGL_DONT_CARE || + conf->MatchNativePixmap == EGL_DONT_CARE) return EGL_FALSE; /* ignore other attributes when EGL_CONFIG_ID is given */ From 96ca6a6262293ce4ed460edf0aadd0ddb1470e79 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 29 Jul 2011 09:58:18 +0900 Subject: [PATCH 117/600] targets/{egl,gbm}: omit unneeded libdrm_radeon --- src/gallium/targets/egl-static/Makefile | 2 -- src/gallium/targets/gbm/Makefile | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 5b7b330a1cd..42d34b8eda1 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -121,7 +121,6 @@ egl_CPPFLAGS += -D_EGL_PIPE_R300=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -egl_SYS += -ldrm_radeon endif # r600 @@ -130,7 +129,6 @@ egl_CPPFLAGS += -D_EGL_PIPE_R600=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a -egl_SYS += -ldrm_radeon endif # vmwgfx diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index 53104253d4f..3ad3eca1d13 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -79,13 +79,11 @@ nouveau_SYS = -ldrm_nouveau r300_LIBS = \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -r300_SYS = -ldrm_radeon # r600 pipe driver r600_LIBS = \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a -r600_SYS = -ldrm_radeon # vmwgfx pipe driver vmwgfx_LIBS = \ From ef1854d09021b6601e59e39fcb71a88fb5e5efb2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 13 Jul 2011 14:24:41 -0700 Subject: [PATCH 118/600] mesa: Fix ff fragment shader inputs calculation when enabling a VS. The FF VS generation happens just after the FF FS generation in state.c, so the ctx->VP._Current value is for the previous state update's vertex shader, not the one that will be chosen as a result of this state update. The vertexShader and vertexProgram variables should be accurately telling us whether there's going to be a ctx->VP._Current (except on _MaintainTnlProgram drivers, where it's always true). The glsl-vs-statechange-1 test was created to test for this, but it turns out that the bug is hidden by the fact that we call _mesa_update_state() twice per draw call -- once from _mesa_valid_to_render() and once from vbo_draw_arrays(), and the second one was fixing up the first one. Reviewed-by: Brian Paul --- src/mesa/main/ff_fragment_shader.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..dbfa6b57d4d 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx ) /* _NEW_RENDERMODE */ fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); } - else if (!(vertexProgram || vertexShader) || - !ctx->VertexProgram._Current) { + else if (!(vertexProgram || vertexShader)) { /* Fixed function vertex logic */ /* _NEW_ARRAY */ GLbitfield varying_inputs = ctx->varying_vp_inputs; From 4fdd289805d14d4f7a234f88cd375be1b3b96764 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:50:43 -0700 Subject: [PATCH 119/600] i965/fs: Respect ARB_color_buffer_float clamping. This was done in the old codegen path, but not the new one. Caught by piglit fbo tests after the conversion to GLSL ff_fragment_shader. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 21 ++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index b82dfd5ead4..4f599fb477e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1745,6 +1745,7 @@ void fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) { int reg_width = c->dispatch_width / 8; + fs_inst *inst; if (c->dispatch_width == 8 || intel->gen == 6) { /* SIMD8 write looks like: @@ -1763,8 +1764,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * m + 6: a0 * m + 7: a1 */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, first_color_mrf + index * reg_width), + color); + inst->saturate = c->key.clamp_fragment_color; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -1782,16 +1785,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * usual destination + 1 for the second half we get * destination + 4. */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; } else { push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_uncompressed(); push_force_sechalf(); color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_sechalf(); color.sechalf = false; } From 5c9e0ad5fddf216921703a0aa9c911a51226cdfd Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 29 Jul 2011 10:59:18 +0900 Subject: [PATCH 120/600] st/egl: create pbuffers with PIPE_BIND_SAMPLER_VIEW So that eglBindTexImage works. --- src/gallium/state_trackers/egl/common/egl_g3d_st.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 60c3e332ac9..b839f848d7b 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -126,7 +126,7 @@ pbuffer_reference_openvg_image(struct egl_g3d_surface *gsurf) } static void -pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) +pbuffer_allocate_pbuffer_texture(struct egl_g3d_surface *gsurf) { struct egl_g3d_display *gdpy = egl_g3d_display(gsurf->base.Resource.Display); @@ -141,7 +141,8 @@ pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) templ.depth0 = 1; templ.array_size = 1; templ.format = gsurf->stvis.color_format; - templ.bind = PIPE_BIND_RENDER_TARGET; + /* for rendering and binding to texture */ + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; ptex = screen->resource_create(screen, &templ); gsurf->render_texture = ptex; @@ -166,7 +167,7 @@ egl_g3d_st_framebuffer_validate_pbuffer(struct st_framebuffer_iface *stfbi, if (!gsurf->render_texture) { switch (gsurf->client_buffer_type) { case EGL_NONE: - pbuffer_allocate_render_texture(gsurf); + pbuffer_allocate_pbuffer_texture(gsurf); break; case EGL_OPENVG_IMAGE: pbuffer_reference_openvg_image(gsurf); From dc1c0ca22a1c7fcaef90b787290144d8e3d77c33 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 29 Jul 2011 11:29:53 -0400 Subject: [PATCH 121/600] r600g: fix up vs export handling Certain attributes (position, psize, etc.) don't count as params; they are handled separately by the hw. However, the VS is required to export at least one param and r600_shader_from_tgsi() takes care of adding a dummy export if there is none. Make sure the VS param export count in the SPI properly accounts for this. Note: This is a candidate for the 7.11 branch. Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/evergreen_state.c | 12 ++++++++++-- src/gallium/drivers/r600/r600_shader.c | 6 ++++++ src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state.c | 12 ++++++++++-- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 152c5cf13a0..bc6039dd40c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2298,7 +2298,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -2317,9 +2317,17 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader spi_vs_out_id[i], 0xFFFFFFFF, NULL); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1), + S_0286C4_VS_EXPORT_COUNT(nparams - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 494f9370597..fc56656f55d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -332,6 +332,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; ctx->shader->output[i].interpolate = d->Declaration.Interpolate; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* these don't count as vertex param exports */ + if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || + (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) + ctx->shader->npos++; + } break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 3ba84bd8907..600c3e2f540 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -40,6 +40,7 @@ struct r600_shader { struct r600_bc bc; unsigned ninput; unsigned noutput; + unsigned npos; unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 294c400caa1..1350a1cf565 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2062,7 +2062,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -2084,9 +2084,17 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad spi_vs_out_id[i], 0xFFFFFFFF, NULL); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1), + S_0286C4_VS_EXPORT_COUNT(nparams - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028868_SQ_PGM_RESOURCES_VS, From 44ffb4ae207e48f78fae55925601b8708ed09c1d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 Jul 2011 11:52:39 -0700 Subject: [PATCH 122/600] i965/fs: Stop using the exec_list iterator. The old style has gone out of favor in the project, but I kept copy and pasting from existing iterator code. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 70 +++++++++---------- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 4 +- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 16 ++--- .../dri/i965/brw_fs_schedule_instructions.cpp | 16 ++--- .../dri/i965/brw_fs_vector_splitting.cpp | 16 ++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 16 ++--- 6 files changed, 67 insertions(+), 71 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b5ea943387d..15475fbae2f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -621,8 +621,8 @@ fs_visitor::assign_curb_setup() } /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { @@ -684,8 +684,8 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == FIXED_HW_REG); @@ -739,8 +739,8 @@ fs_visitor::split_virtual_grfs() split_grf[this->delta_x.reg] = false; } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Texturing produces 4 contiguous registers, so no splitting. */ if (inst->is_tex()) { @@ -763,8 +763,8 @@ fs_visitor::split_virtual_grfs() } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && split_grf[inst->dst.reg] && @@ -815,8 +815,8 @@ fs_visitor::setup_pull_constants() int pull_uniform_base = max_uniform_components; int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != UNIFORM) @@ -871,8 +871,8 @@ fs_visitor::calculate_live_intervals() } int ip = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == BRW_OPCODE_DO) { if (loop_depth++ == 0) @@ -945,8 +945,8 @@ fs_visitor::propagate_constants() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || @@ -965,11 +965,9 @@ fs_visitor::propagate_constants() /* Found a move of a constant to a GRF. Find anything else using the GRF * before it's written, and replace it with the constant if we can. */ - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || scan_inst->opcode == BRW_OPCODE_ELSE || @@ -1077,8 +1075,8 @@ fs_visitor::dead_code_eliminate() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); @@ -1101,8 +1099,8 @@ fs_visitor::register_coalesce() int if_depth = 0; int loop_depth = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Make sure that we dominate the instructions we're going to * scan for interfering with our coalescing, or we won't have @@ -1141,11 +1139,10 @@ fs_visitor::register_coalesce() * program. */ bool interfered = false; - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -1176,10 +1173,9 @@ fs_visitor::register_coalesce() /* Rewrite the later usage to point at the source of the move to * be removed. */ - for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); - scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = inst; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { for (int i = 0; i < 3; i++) { if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && @@ -1212,8 +1208,8 @@ fs_visitor::compute_to_mrf() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; int ip = next_ip; next_ip++; @@ -1392,8 +1388,8 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; switch (inst->opcode) { case BRW_OPCODE_DO: @@ -1527,8 +1523,8 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; this->result = reg_undef; ir->accept(this); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index eecfc92eb5b..9fb0153d1f8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -609,8 +609,8 @@ fs_visitor::generate_code() prog->Name, c->dispatch_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b4689d2c293..78daa491156 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -75,8 +75,8 @@ fs_visitor::assign_regs_trivial() last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * reg_width); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -283,8 +283,8 @@ fs_visitor::assign_regs() reg_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -336,8 +336,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { @@ -394,8 +394,8 @@ fs_visitor::spill_reg(int spill_reg) * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index d8218c26edb..9ec3f502764 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -283,8 +283,8 @@ instruction_scheduler::calculate_deps() memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; fs_inst *inst = n->inst; /* read-after-write deps. */ @@ -437,8 +437,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) int time = 0; /* Remove non-DAG heads from the list. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list_safe(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->parent_count != 0) n->remove(); } @@ -447,8 +447,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *chosen = NULL; int chosen_time = 0; - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (!chosen || n->unblocked_time < chosen_time) { chosen = n; @@ -490,8 +490,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) * progress until the first is done. */ if (chosen->inst->is_math()) { - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 530ffa26580..a9a60c2fd8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) break; } - foreach_iter(exec_list_iterator, iter, this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) return entry; } @@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) if (!var->type->is_vector()) return NULL; - foreach_iter(exec_list_iterator, iter, *this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &*this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) { return entry; } @@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions) visit_list_elements(&refs, instructions); /* Trim out variables we can't split. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list_safe(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; if (debug) { printf("vector %s@%p: decl %d, whole_access %d\n", @@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions) /* Replace the decls of the vectors to be split with their split * components. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 4f599fb477e..2b769ccbba1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1477,8 +1477,8 @@ fs_visitor::visit(ir_if *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1487,8 +1487,8 @@ fs_visitor::visit(ir_if *ir) if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1538,8 +1538,8 @@ fs_visitor::visit(ir_loop *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; @@ -1595,8 +1595,8 @@ fs_visitor::visit(ir_function *ir) assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &sig->body) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); From 652ef8569c923cf8e1e254dddc160c7995d258aa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 15:48:53 -0700 Subject: [PATCH 123/600] Revert "i965: Don't compute brw->wm.input_size_masks when it's unused." This reverts commit 3412069e23b7fa5656262f3dd1aa86f66980594d. We're about to start using it in fragment shaders to handle avoiding projection for fixed function. --- src/mesa/drivers/dri/i965/brw_vs_constval.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9fdfebe9f76..47cc0a7da7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; - /* If we're going to go through brw_fs.cpp, we don't end up using - * brw->wm.input_size_masks. - */ - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) - return; - memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_INPUT_DIMENSIONS), + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, .prepare = calc_wm_input_sizes From eb30820f268608cf451da32de69723036dddbc62 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 15:56:46 -0700 Subject: [PATCH 124/600] i965/fs: Port texture projection avoidance optimization from the old backend. This is part of fixing a ~1% performance regression in OpenArena when changing the fixed function fragment shader to using the new backend. Right now this just avoids the LINTERP of the projector, not the math using it. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 15475fbae2f..9c3180fbc1c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -463,9 +463,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) } else { /* Perspective interpolation case. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - emit(FS_OPCODE_LINTERP, attr, - this->delta_x, this->delta_y, fs_reg(interp)); + /* FINISHME: At some point we probably want to push + * this farther by giving similar treatment to the + * other potentially constant components of the + * attribute, as well as making brw_vs_constval.c + * handle varyings other than gl_TexCoord. + */ + if (location >= FRAG_ATTRIB_TEX0 && + location <= FRAG_ATTRIB_TEX7 && + k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { + emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); + } else { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); + } attr.reg_offset++; } From 6d8d6b41b85a18685351f3023a4cd41266ba9e68 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:18:39 -0700 Subject: [PATCH 125/600] i965/fs: If we see a RCP of a constant, try to constant fold it. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9c3180fbc1c..351d1dd283e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1056,6 +1056,20 @@ fs_visitor::propagate_constants() progress = true; } break; + + case FS_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + scan_inst->opcode = BRW_OPCODE_MOV; + scan_inst->src[0] = inst->src[0]; + progress = true; + } + break; } } From a8b86459a1bb74cfdf0d63572a9fe194b2b5b53f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:45:15 -0700 Subject: [PATCH 126/600] i965/fs: Optimize a * 1.0 -> a. This appears in our instruction stream as a result of the brw_vs_constval.c handling. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 43 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + 2 files changed, 44 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 351d1dd283e..a9617c56e12 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1067,6 +1067,7 @@ fs_visitor::propagate_constants() if (inst->src[0].imm.f != 0.0f) { scan_inst->opcode = BRW_OPCODE_MOV; scan_inst->src[0] = inst->src[0]; + scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; progress = true; } break; @@ -1087,6 +1088,47 @@ fs_visitor::propagate_constants() return progress; } + + +/** + * Attempts to move immediate constants into the immediate + * constant slot of following instructions. + * + * Immediate constants are a bit tricky -- they have to be in the last + * operand slot, you can't do abs/negate on them, + */ + +bool +fs_visitor::opt_algebraic() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + switch (inst->opcode) { + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + /* a * 1.0 = a */ + if (inst->src[1].type == BRW_REGISTER_TYPE_F && + inst->src[1].imm.f == 1.0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + } + } + + return progress; +} + /** * Must be called after calculate_live_intervales() to remove unused * writes to registers -- register allocation will fail otherwise @@ -1572,6 +1614,7 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; progress = propagate_constants() || progress; + progress = opt_algebraic() || progress; progress = register_coalesce() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2bf850e5dea..89d6cda7e4f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -485,6 +485,7 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); From f710b8c7501f29f5f8941e757ea1066cbeb03305 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:52:54 -0700 Subject: [PATCH 127/600] i965/fs: Allow register coalescing where the source is a uniform. Removes 0.8% of the fragment shader instructions on Unigine Tropics. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a9617c56e12..e07798cebc1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1196,7 +1196,8 @@ fs_visitor::register_coalesce() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || - inst->dst.file != GRF || inst->src[0].file != GRF || + inst->dst.file != GRF || (inst->src[0].file != GRF && + inst->src[0].file != UNIFORM)|| inst->dst.type != inst->src[0].type) continue; @@ -1218,7 +1219,8 @@ fs_visitor::register_coalesce() interfered = true; break; } - if (scan_inst->dst.reg == inst->src[0].reg && + if (inst->src[0].file == GRF && + scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || scan_inst->is_tex())) { interfered = true; @@ -1226,10 +1228,13 @@ fs_visitor::register_coalesce() } } - /* The gen6 MATH instruction can't handle source modifiers, so avoid - * coalescing those for now. We should do something more specific. + /* The gen6 MATH instruction can't handle source modifiers or + * unusual register regions, so avoid coalescing those for + * now. We should do something more specific. */ - if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) { + if (intel->gen >= 6 && + scan_inst->is_math() && + (has_source_modifiers || inst->src[0].file == UNIFORM)) { interfered = true; break; } @@ -1248,11 +1253,10 @@ fs_visitor::register_coalesce() if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && scan_inst->src[i].reg_offset == inst->dst.reg_offset) { - scan_inst->src[i].reg = inst->src[0].reg; - scan_inst->src[i].reg_offset = inst->src[0].reg_offset; - scan_inst->src[i].abs |= inst->src[0].abs; - scan_inst->src[i].negate ^= inst->src[0].negate; - scan_inst->src[i].smear = inst->src[0].smear; + fs_reg new_src = inst->src[0]; + new_src.negate ^= scan_inst->src[i].negate; + new_src.abs |= scan_inst->src[i].abs; + scan_inst->src[i] = new_src; } } } From dc1f32deae1ab7366792fe5c7d654e02757985c0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Jul 2011 16:49:55 -0600 Subject: [PATCH 128/600] mesa: add missing breaks for GL_TEXTURE_CUBE_MAP_SEAMLESS queries And fix indentation. NOTE: This is a candidate for the 7.11 branch. --- src/mesa/main/texparam.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 3f771f08bc6..134f15346e8 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1266,12 +1266,13 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { + *params = (GLfloat) obj->Sampler.CubeMapSeamless; + } + else { + error = GL_TRUE; + } + break; default: error = GL_TRUE; @@ -1441,6 +1442,7 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) else { error = GL_TRUE; } + break; default: ; /* silence warnings */ From 120d71a45cfda1edfa8cd6b1732e209eb98b53d8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Jul 2011 16:49:55 -0600 Subject: [PATCH 129/600] mesa: minor comment changes in teximage.c --- src/mesa/main/teximage.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 302fd65cb27..27717cfb0f5 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1,6 +1,5 @@ /* - * mesa 3-D graphics library - * Version: 7.6 + * Mesa 3-D graphics library * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * Copyright (C) 2009 VMware, Inc. All Rights Reserved. @@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target) * \param target texture target. * \param level image level. * \param texImage texture image. - * - * This was basically prompted by the introduction of cube maps. */ void _mesa_set_tex_image(struct gl_texture_object *tObj, @@ -709,15 +706,13 @@ get_proxy_target(GLenum target) /** * Get the texture object that corresponds to the target of the given - * texture unit. + * texture unit. The target should have already been checked for validity. * * \param ctx GL context. * \param texUnit texture unit. * \param target texture target. * * \return pointer to the texture object on success, or NULL on failure. - * - * \sa gl_texture_unit. */ struct gl_texture_object * _mesa_select_tex_object(struct gl_context *ctx, From 200e4972c1579e8dfaa6f11eee2a7e54baad4852 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jul 2011 12:21:27 -0700 Subject: [PATCH 130/600] glsl: Add method glsl_type::can_implicitly_convert_to() This method checks if a source type is identical to or can be implicitly converted to a target type according to the GLSL 1.20 spec, Section 4.1.10 Implicit Conversions. The following commits use the method for a bugfix: glsl: Fix implicit conversions in non-constructor function calls glsl: Fix implicit conversions in array constructors Note: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Kenneth Graunke Signed-off-by: Chad Versace --- src/glsl/glsl_types.cpp | 16 ++++++++++++++++ src/glsl/glsl_types.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index a5e21bbb96c..c94aec0d2da 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -523,3 +523,19 @@ glsl_type::component_slots() const return 0; } } + +bool +glsl_type::can_implicitly_convert_to(const glsl_type *desired) const +{ + if (this == desired) + return true; + + /* There is no conversion among matrix types. */ + if (this->matrix_columns > 1 || desired->matrix_columns > 1) + return false; + + /* int and uint can be converted to float. */ + return desired->is_float() + && this->is_integer() + && this->vector_elements == desired->vector_elements; +} diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 87f57e7c756..048696693be 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -224,6 +224,41 @@ struct glsl_type { */ unsigned component_slots() const; + /** + * \brief Can this type be implicitly converted to another? + * + * \return True if the types are identical or if this type can be converted + * to \c desired according to Section 4.1.10 of the GLSL spec. + * + * \verbatim + * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10 + * Implicit Conversions: + * + * In some situations, an expression and its type will be implicitly + * converted to a different type. The following table shows all allowed + * implicit conversions: + * + * Type of expression | Can be implicitly converted to + * -------------------------------------------------- + * int float + * uint + * + * ivec2 vec2 + * uvec2 + * + * ivec3 vec3 + * uvec3 + * + * ivec4 vec4 + * uvec4 + * + * There are no implicit array or structure conversions. For example, + * an array of int cannot be implicitly converted to an array of float. + * There are no implicit conversions between signed and unsigned + * integers. + * \endverbatim + */ + bool can_implicitly_convert_to(const glsl_type *desired) const; /** * Query whether or not a type is a scalar (non-vector and non-matrix). From 8b3627fd7b52723102f070957d87f98073e92d7c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jul 2011 12:31:10 -0700 Subject: [PATCH 131/600] glsl: Fix implicit conversions in non-constructor function calls Context ------- In ast_function_expression::hir(), parameter_lists_match() checks if the function call's actual parameter list matches the signature's parameter list, where the match may require implicit conversion of some arguments. To check if an implicit conversion exists between individual arguments, type_compare() is used. Problems -------- type_compare() allowed the following illegal implicit conversions: bool -> float bvecN -> vecN int -> uint ivecN -> uvecN uint -> int uvecN -> ivecN Change ------ type_compare() is buggy, so replace it with glsl_type::can_be_implicitly_converted_to(). This comprises a rewrite of parameter_lists_match(). Fixes piglit:spec/glsl-1.20/compiler/built-in-functions/outerProduct-bvec*.vert Note: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Kenneth Graunke Signed-off-by: Chad Versace --- src/glsl/ir_function.cpp | 46 +++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 0f2f1a0eea4..eca0079c166 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -85,12 +85,25 @@ type_compare(const glsl_type *a, const glsl_type *b) } +/** + * \brief Check if two parameter lists match. + * + * \param list_a Parameters of the function definition. + * \param list_b Actual parameters passed to the function. + * \return If an exact match, return 0. + * If an inexact match requiring implicit conversion, return 1. + * If not a match, return -1. + * \see matching_signature() + */ static int parameter_lists_match(const exec_list *list_a, const exec_list *list_b) { const exec_node *node_a = list_a->head; const exec_node *node_b = list_b->head; - int total_score = 0; + + /* This is set to true if there is an inexact match requiring an implicit + * conversion. */ + bool inexact_match = false; for (/* empty */ ; !node_a->is_tail_sentinel() @@ -106,12 +119,11 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) const ir_variable *const param = (ir_variable *) node_a; const ir_instruction *const actual = (ir_instruction *) node_b; - /* Determine whether or not the types match. If the types are an - * exact match, the match score is zero. If the types don't match - * but the actual parameter can be coerced to the type of the declared - * parameter, the match score is one. - */ - int score; + if (param->type == actual->type) + continue; + + /* Try to find an implicit conversion from actual to param. */ + inexact_match = true; switch ((enum ir_variable_mode)(param->mode)) { case ir_var_auto: case ir_var_uniform: @@ -125,11 +137,13 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) case ir_var_const_in: case ir_var_in: - score = type_compare(param->type, actual->type); + if (!actual->type->can_implicitly_convert_to(param->type)) + return -1; break; case ir_var_out: - score = type_compare(actual->type, param->type); + if (!param->type->can_implicitly_convert_to(actual->type)) + return -1; break; case ir_var_inout: @@ -137,17 +151,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * there is int -> float but no float -> int), inout parameters must * be exact matches. */ - score = (type_compare(actual->type, param->type) == 0) ? 0 : -1; - break; + return -1; default: assert(false); - } - - if (score < 0) return -1; - - total_score += score; + } } /* If all of the parameters from the other parameter list have been @@ -157,7 +166,10 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) if (!node_b->is_tail_sentinel()) return -1; - return total_score; + if (inexact_match) + return 1; + else + return 0; } From 6efe1a849586e46028c1eb763175904166ec7076 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jul 2011 12:32:10 -0700 Subject: [PATCH 132/600] glsl: Remove ir_function.cpp:type_compare() The function is no longer used and has been replaced by glsl_type::can_implicitly_convert_to(). Note: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Kenneth Graunke Signed-off-by: Chad Versace --- src/glsl/ir_function.cpp | 61 ---------------------------------------- 1 file changed, 61 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index eca0079c166..dd63e3078f8 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -24,67 +24,6 @@ #include "glsl_types.h" #include "ir.h" -int -type_compare(const glsl_type *a, const glsl_type *b) -{ - /* If the types are the same, they trivially match. - */ - if (a == b) - return 0; - - switch (a->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - /* There is no implicit conversion to or from integer types or bool. - */ - if ((a->is_integer() != b->is_integer()) - || (a->is_boolean() != b->is_boolean())) - return -1; - - /* FALLTHROUGH */ - - case GLSL_TYPE_FLOAT: - if ((a->vector_elements != b->vector_elements) - || (a->matrix_columns != b->matrix_columns)) - return -1; - - return 1; - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_STRUCT: - /* Samplers and structures must match exactly. - */ - return -1; - - case GLSL_TYPE_ARRAY: - if ((b->base_type != GLSL_TYPE_ARRAY) - || (a->length != b->length)) - return -1; - - /* From GLSL 1.50 spec, page 27 (page 33 of the PDF): - * "There are no implicit array or structure conversions." - * - * If the comparison of the array element types detects that a conversion - * would be required, the array types do not match. - */ - return (type_compare(a->fields.array, b->fields.array) == 0) ? 0 : -1; - - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - default: - /* These are all error conditions. It is invalid for a parameter to - * a function to be declared as error, void, or a function. - */ - return -1; - } - - /* This point should be unreachable. - */ - assert(0); -} - - /** * \brief Check if two parameter lists match. * From a5ab9398e34287ed8cbb010d0758790e6692530c Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jul 2011 13:00:02 -0700 Subject: [PATCH 133/600] glsl: Fix conversions in array constructors Array constructors obey narrower conversion rules than other constructors [1] --- they use the implicit conversion rules [2] instead of the scalar constructor conversions [3]. But process_array_constructor() was incorrectly applying the broader rules. [1] GLSL 1.50 spec, Section 5.4.4 Array Constructors, page 52 (58 of pdf) [2] GLSL 1.50 spec, Section 4.1.10 Implicit Conversions, page 25 (31 of pdf) [3] GLSL 1.50 spec, Section 5.4.1 Conversion, page 48 (54 of pdf) To fix this, first check (with glsl_type::can_be_implicitly_converted_to) if an implicit conversion is legal before performing the conversion. Fixes: piglit:spec/glsl-1.20/compiler/structure-and-array-operations/array-ctor-implicit-conversion-bool-float.vert piglit:spec/glsl-1.20/compiler/structure-and-array-operations/array-ctor-implicit-conversion-bvec*-vec*.vert Note: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Kenneth Graunke Signed-off-by: Chad Versace --- src/glsl/ast_function.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index bdb73f48706..8bcf48dfd91 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -442,13 +442,21 @@ process_array_constructor(exec_list *instructions, ir_rvalue *ir = (ir_rvalue *) n; ir_rvalue *result = ir; - /* Apply implicit conversions (not the scalar constructor rules!) */ + /* Apply implicit conversions (not the scalar constructor rules!). See + * the spec quote above. */ if (constructor_type->element_type()->is_float()) { const glsl_type *desired_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, ir->type->vector_elements, ir->type->matrix_columns); - result = convert_component(ir, desired_type); + if (result->type->can_implicitly_convert_to(desired_type)) { + /* Even though convert_component() implements the constructor + * conversion rules (not the implicit conversion rules), its safe + * to use it here because we already checked that the implicit + * conversion is legal. + */ + result = convert_component(ir, desired_type); + } } if (result->type != constructor_type->element_type()) { From 5081d31a0ed753e7e23c5ed51f572d38aef66bfe Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 27 Jul 2011 12:37:51 -0700 Subject: [PATCH 134/600] glsl: Clarify ir_function::matching_sigature() The function used a variable named 'score', which was an outright lie. A signature matches or it doesn't; there is no fuzzy scoring. Change the return type of parameter_lists_match() to an enum, and let ir_function::matching_sigature() switch on that enum. Reviewed-by: Kenneth Graunke Signed-off-by: Chad Versace --- src/glsl/ir_function.cpp | 53 +++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index dd63e3078f8..6cfc32cc2a0 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -24,17 +24,28 @@ #include "glsl_types.h" #include "ir.h" +typedef enum { + PARAMETER_LIST_NO_MATCH, + PARAMETER_LIST_EXACT_MATCH, + PARAMETER_LIST_INEXACT_MATCH, /*< Match requires implicit conversion. */ +} parameter_list_match_t; + /** * \brief Check if two parameter lists match. * * \param list_a Parameters of the function definition. * \param list_b Actual parameters passed to the function. - * \return If an exact match, return 0. - * If an inexact match requiring implicit conversion, return 1. - * If not a match, return -1. * \see matching_signature() */ -static int + +/** + * \brief Check if two parameter lists match. + * + * \param list_a Parameters of the function definition. + * \param list_b Actual parameters passed to the function. + * \see matching_signature() + */ +static parameter_list_match_t parameter_lists_match(const exec_list *list_a, const exec_list *list_b) { const exec_node *node_a = list_a->head; @@ -52,7 +63,7 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * do not match. */ if (node_b->is_tail_sentinel()) - return -1; + return PARAMETER_LIST_NO_MATCH; const ir_variable *const param = (ir_variable *) node_a; @@ -72,17 +83,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * as uniform. */ assert(0); - return -1; + return PARAMETER_LIST_NO_MATCH; case ir_var_const_in: case ir_var_in: if (!actual->type->can_implicitly_convert_to(param->type)) - return -1; + return PARAMETER_LIST_NO_MATCH; break; case ir_var_out: if (!param->type->can_implicitly_convert_to(actual->type)) - return -1; + return PARAMETER_LIST_NO_MATCH; break; case ir_var_inout: @@ -90,11 +101,11 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * there is int -> float but no float -> int), inout parameters must * be exact matches. */ - return -1; + return PARAMETER_LIST_NO_MATCH; default: assert(false); - return -1; + return PARAMETER_LIST_NO_MATCH; } } @@ -103,12 +114,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * match. */ if (!node_b->is_tail_sentinel()) - return -1; + return PARAMETER_LIST_NO_MATCH; if (inexact_match) - return 1; + return PARAMETER_LIST_INEXACT_MATCH; else - return 0; + return PARAMETER_LIST_EXACT_MATCH; } @@ -132,18 +143,20 @@ ir_function::matching_signature(const exec_list *actual_parameters) ir_function_signature *const sig = (ir_function_signature *) iter.get(); - const int score = parameter_lists_match(& sig->parameters, - actual_parameters); - - /* If we found an exact match, simply return it */ - if (score == 0) + switch (parameter_lists_match(& sig->parameters, actual_parameters)) { + case PARAMETER_LIST_EXACT_MATCH: return sig; - - if (score > 0) { + case PARAMETER_LIST_INEXACT_MATCH: if (match == NULL) match = sig; else multiple_inexact_matches = true; + continue; + case PARAMETER_LIST_NO_MATCH: + continue; + default: + assert(false); + return NULL; } } From e737a99a6fbafe3ba4b5175eea25d1598dbeb9d8 Mon Sep 17 00:00:00 2001 From: Jeremy Huddleston Date: Sun, 31 Jul 2011 09:21:56 -0700 Subject: [PATCH 135/600] Fix PPC detection on darwin Fixes regression introduced by 7004582c1894ede839c44e292b413fe4916d7e9e Signed-off-by: Jeremy Huddleston --- src/gallium/include/pipe/p_config.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index eea3d79e64b..803b806592c 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -99,9 +99,9 @@ #endif #endif -#if defined(__PPC__) +#if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__) #define PIPE_ARCH_PPC -#if defined(__PPC64__) +#if defined(__ppc64__) || defined(__PPC64__) #define PIPE_ARCH_PPC_64 #endif #endif From 5b3c7199830b8eaac4df2f8c3f10d0e89b4bd5c5 Mon Sep 17 00:00:00 2001 From: Jeremy Huddleston Date: Sun, 31 Jul 2011 09:31:48 -0700 Subject: [PATCH 136/600] darwin: Use machine/endian.h to determine endianness Signed-off-by: Jeremy Huddleston --- src/gallium/include/pipe/p_config.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 803b806592c..8a5d892c884 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -120,6 +120,15 @@ # define PIPE_ARCH_BIG_ENDIAN #endif +#elif defined(__APPLE__) +#include + +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +# define PIPE_ARCH_LITTLE_ENDIAN +#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN +# define PIPE_ARCH_BIG_ENDIAN +#endif + #else #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) From 5541920e0ac4ea8383c7f896daba24a304aafec6 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 1 Aug 2011 09:36:08 -0700 Subject: [PATCH 137/600] glsl: Remove duplicate comment Remove duplicate doxgen comment for ir_function.cpp:parameter_lists_match(). Signed-off-by: Chad Versace --- src/glsl/ir_function.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 6cfc32cc2a0..2a4de5b0dcd 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -30,14 +30,6 @@ typedef enum { PARAMETER_LIST_INEXACT_MATCH, /*< Match requires implicit conversion. */ } parameter_list_match_t; -/** - * \brief Check if two parameter lists match. - * - * \param list_a Parameters of the function definition. - * \param list_b Actual parameters passed to the function. - * \see matching_signature() - */ - /** * \brief Check if two parameter lists match. * From d4c80f5f85c749df3fc091ff07b60ef4989fa6d9 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 27 Jul 2011 14:34:12 -0700 Subject: [PATCH 138/600] glsl: improve the accuracy of the asin() builtin function. The previous formula for asin(x) was algebraically equivalent to: sign(x)*(pi/2 - sqrt(1-|x|)*(A + B|x| + C|x|^2)) where A, B, and C were arbitrary constants determined by a curve fit. This formula had a worst case absolute error of 0.00448, an unbounded worst case relative error, and a discontinuity near x=0. Changed the formula to: sign(x)*(pi/2 - sqrt(1-|x|)*(pi/2 + (pi/4-1)|x| + A|x|^2 + B|x|^3)) where A and B are arbitrary constants determined by a curve fit. This has a worst case absolute error of 0.00039, a worst case relative error of 0.000405, and no discontinuities. I don't expect a significant performance degradation, since the extra multiply-accumulate should be fast compared to the sqrt() computation. Fixes piglit tests {vs,fs}-asin-float and {vs,fs}-atan-* --- src/glsl/builtins/ir/asin | 68 +++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/src/glsl/builtins/ir/asin b/src/glsl/builtins/ir/asin index e230ad614ee..45d9e672958 100644 --- a/src/glsl/builtins/ir/asin +++ b/src/glsl/builtins/ir/asin @@ -5,23 +5,26 @@ ((return (expression float * (expression float sign (var_ref x)) (expression float - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression float * (expression float sqrt (expression float - (constant float (1.0)) (expression float abs (var_ref x)))) (expression float + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression float * (expression float abs (var_ref x)) (expression float + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression float * - (constant float (0.0742610)) - (expression float abs (var_ref x)))))))))))) + (expression float abs (var_ref x)) + (expression float + + (constant float (0.086566724)) + (expression float * + (expression float abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) (signature vec2 (parameters @@ -29,23 +32,26 @@ ((return (expression vec2 * (expression vec2 sign (var_ref x)) (expression vec2 - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression vec2 * (expression vec2 sqrt (expression vec2 - (constant float (1.0)) (expression vec2 abs (var_ref x)))) (expression vec2 + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression vec2 * (expression vec2 abs (var_ref x)) (expression vec2 + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression vec2 * - (constant float (0.0742610)) - (expression vec2 abs (var_ref x)))))))))))) + (expression vec2 abs (var_ref x)) + (expression vec2 + + (constant float (0.086566724)) + (expression vec2 * + (expression vec2 abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) (signature vec3 (parameters @@ -53,23 +59,26 @@ ((return (expression vec3 * (expression vec3 sign (var_ref x)) (expression vec3 - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression vec3 * (expression vec3 sqrt (expression vec3 - (constant float (1.0)) (expression vec3 abs (var_ref x)))) (expression vec3 + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression vec3 * (expression vec3 abs (var_ref x)) (expression vec3 + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression vec3 * - (constant float (0.0742610)) - (expression vec3 abs (var_ref x)))))))))))) + (expression vec3 abs (var_ref x)) + (expression vec3 + + (constant float (0.086566724)) + (expression vec3 * + (expression vec3 abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) (signature vec4 (parameters @@ -77,21 +86,24 @@ ((return (expression vec4 * (expression vec4 sign (var_ref x)) (expression vec4 - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression vec4 * (expression vec4 sqrt (expression vec4 - (constant float (1.0)) (expression vec4 abs (var_ref x)))) (expression vec4 + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression vec4 * (expression vec4 abs (var_ref x)) (expression vec4 + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression vec4 * - (constant float (0.0742610)) - (expression vec4 abs (var_ref x)))))))))))) + (expression vec4 abs (var_ref x)) + (expression vec4 + + (constant float (0.086566724)) + (expression vec4 * + (expression vec4 abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) )) From b1b4ea0b3679db0b8fddaa9663a10d4712bba3b7 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 27 Jul 2011 15:53:31 -0700 Subject: [PATCH 139/600] glsl: improve the accuracy of the atan(x,y) builtin function. The previous formula for atan(x,y) returned a value of +/- pi whenever |x|<0.0001, and used a formula based on atan(y/x) otherwise. This broke in cases where both x and y were small (e.g. atan(1e-5, 1e-5)). This patch modifies the formula so that it returns a value of +/- pi whenever |x|<1e-8*|y|, and uses the formula based on atan(y/x) otherwise. --- src/glsl/builtins/ir/atan | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/builtins/ir/atan b/src/glsl/builtins/ir/atan index cfecc1f1749..7b5ea13c6ba 100644 --- a/src/glsl/builtins/ir/atan +++ b/src/glsl/builtins/ir/atan @@ -54,7 +54,9 @@ ) ( (declare () float r) - (if (expression bool > (expression float abs (var_ref x)) (constant float (0.000100))) ( + (if (expression bool > + (expression float abs (var_ref x)) + (expression float * (constant float (1.0e-8)) (expression float abs (var_ref y)))) ( (assign (x) (var_ref r) (call atan ((expression float / (var_ref y) (var_ref x))))) (if (expression bool < (var_ref x) (constant float (0.000000)) ) ( (if (expression bool >= (var_ref y) (constant float (0.000000)) ) From f379d8f73063a4c4d6cf379318c6b37118d46bfa Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 25 Apr 2011 23:37:47 -0500 Subject: [PATCH 140/600] st/mesa: Add a GLSL IR to TGSI translator. It is still a work in progress at this point, but it produces working and reasonably well-optimized code. Originally based on ir_to_mesa and st_mesa_to_tgsi, but does not directly use Mesa IR instructions in TGSI generation, instead generating TGSI from the intermediate class glsl_to_tgsi_instruction. It also has new optimization passes to replace _mesa_optimize_program. --- src/mesa/sources.mak | 3 +- src/mesa/state_tracker/st_cb_program.c | 14 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4431 ++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 66 + src/mesa/state_tracker/st_mesa_to_tgsi.c | 4 +- src/mesa/state_tracker/st_mesa_to_tgsi.h | 6 + src/mesa/state_tracker/st_program.c | 399 +- src/mesa/state_tracker/st_program.h | 27 + 8 files changed, 4767 insertions(+), 183 deletions(-) create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.cpp create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.h diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 4b2ec08bbb0..ed008f8813e 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -336,7 +336,8 @@ MESA_GALLIUM_SOURCES = \ MESA_GALLIUM_CXX_SOURCES = \ $(MAIN_CXX_SOURCES) \ - $(SHADER_CXX_SOURCES) + $(SHADER_CXX_SOURCES) \ + state_tracker/st_glsl_to_tgsi.cpp # All the core C sources, for dependency checking ALL_SOURCES = \ diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 32694975d17..2abb4d8f082 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -44,6 +44,7 @@ #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" +#include "st_glsl_to_tgsi.h" @@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) { struct st_vertex_program *stvp = (struct st_vertex_program *) prog; st_release_vp_variants( st, stvp ); + + if (stvp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; case MESA_GEOMETRY_PROGRAM: @@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); + + if (stgp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi); if (stgp->tgsi.tokens) { st_free_tokens((void *) stgp->tgsi.tokens); @@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) st_release_fp_variants(st, stfp); + if (stfp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); + if (stfp->tgsi.tokens) { st_free_tokens(stfp->tgsi.tokens); stfp->tgsi.tokens = NULL; @@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions) functions->DeleteProgram = st_delete_program; functions->IsProgramNative = st_is_program_native; functions->ProgramStringNotify = st_program_string_notify; + + functions->NewShader = st_new_shader; + functions->NewShaderProgram = st_new_shader_program; + functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp new file mode 100644 index 00000000000..e1102503ee0 --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -0,0 +1,4431 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file glsl_to_tgsi.cpp + * + * Translate GLSL IR to Mesa's gl_program representation and to TGSI. + */ + +#include +#include "main/compiler.h" +#include "ir.h" +#include "ir_visitor.h" +#include "ir_print_visitor.h" +#include "ir_expression_flattening.h" +#include "glsl_types.h" +#include "glsl_parser_extras.h" +#include "../glsl/program.h" +#include "ir_optimization.h" +#include "ast.h" + +extern "C" { +#include "main/mtypes.h" +#include "main/shaderapi.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/hash_table.h" +#include "program/prog_instruction.h" +#include "program/prog_optimize.h" +#include "program/prog_print.h" +#include "program/program.h" +#include "program/prog_uniform.h" +#include "program/prog_parameter.h" +#include "program/sampler.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "util/u_math.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_dump.h" +#include "st_context.h" +#include "st_program.h" +#include "st_glsl_to_tgsi.h" +#include "st_mesa_to_tgsi.h" + +#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ + (1 << PROGRAM_ENV_PARAM) | \ + (1 << PROGRAM_STATE_VAR) | \ + (1 << PROGRAM_NAMED_PARAM) | \ + (1 << PROGRAM_CONSTANT) | \ + (1 << PROGRAM_UNIFORM)) +} + +class st_src_reg; +class st_dst_reg; + +static int swizzle_for_size(int size); + +/** + * This struct is a corresponding struct to Mesa prog_src_register, with + * wider fields. + */ +class st_src_reg { +public: + st_src_reg(gl_register_file file, int index, const glsl_type *type) + { + this->file = file; + this->index = index; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + + st_src_reg() + { + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->swizzle = 0; + this->negate = 0; + this->reladdr = NULL; + } + + explicit st_src_reg(st_dst_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ + int negate; /**< NEGATE_XYZW mask from mesa */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +class st_dst_reg { +public: + st_dst_reg(gl_register_file file, int writemask) + { + this->file = file; + this->index = 0; + this->writemask = writemask; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + st_dst_reg() + { + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->writemask = 0; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + explicit st_dst_reg(st_src_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + GLuint cond_mask:4; + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +st_src_reg::st_src_reg(st_dst_reg reg) +{ + this->file = reg.file; + this->index = reg.index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; +} + +st_dst_reg::st_dst_reg(st_src_reg reg) +{ + this->file = reg.file; + this->index = reg.index; + this->writemask = WRITEMASK_XYZW; + this->cond_mask = COND_TR; + this->reladdr = reg.reladdr; +} + +class glsl_to_tgsi_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + enum prog_opcode op; + st_dst_reg dst; + st_src_reg src[3]; + /** Pointer to the ir source this tree came from for debugging */ + ir_instruction *ir; + GLboolean cond_update; + bool saturate; + int sampler; /**< sampler index */ + int tex_target; /**< One of TEXTURE_*_INDEX */ + GLboolean tex_shadow; + + class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ +}; + +class variable_storage : public exec_node { +public: + variable_storage(ir_variable *var, gl_register_file file, int index) + : file(file), index(index), var(var) + { + /* empty */ + } + + gl_register_file file; + int index; + ir_variable *var; /* variable that maps to this, if any */ +}; + +class function_entry : public exec_node { +public: + ir_function_signature *sig; + + /** + * identifier of this function signature used by the program. + * + * At the point that Mesa instructions for function calls are + * generated, we don't know the address of the first instruction of + * the function body. So we make the BranchTarget that is called a + * small integer and rewrite them during set_branchtargets(). + */ + int sig_id; + + /** + * Pointer to first instruction of the function body. + * + * Set during function body emits after main() is processed. + */ + glsl_to_tgsi_instruction *bgn_inst; + + /** + * Index of the first instruction of the function body in actual + * Mesa IR. + * + * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + */ + int inst; + + /** Storage for the return value. */ + st_src_reg return_reg; +}; + +class glsl_to_tgsi_visitor : public ir_visitor { +public: + glsl_to_tgsi_visitor(); + ~glsl_to_tgsi_visitor(); + + function_entry *current_function; + + struct gl_context *ctx; + struct gl_program *prog; + struct gl_shader_program *shader_program; + struct gl_shader_compiler_options *options; + + int next_temp; + + int num_address_regs; + bool indirect_addr_temps; + bool indirect_addr_consts; + + variable_storage *find_variable_storage(ir_variable *var); + + function_entry *get_function_signature(ir_function_signature *sig); + + st_src_reg get_temp(const glsl_type *type); + void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); + + st_src_reg st_src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + st_src_reg result; + + /** List of variable_storage */ + exec_list variables; + + /** List of function_entry */ + exec_list function_signatures; + int next_signature_id; + + /** List of glsl_to_tgsi_instruction */ + exec_list instructions; + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, const st_src_reg &src); + + GLboolean try_emit_mad(ir_expression *ir, + int mul_operand); + GLboolean try_emit_sat(ir_expression *ir); + + void emit_swz(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void rename_temp_register(int index, int new_index); + int get_first_temp_read(int index); + int get_first_temp_write(int index); + int get_last_temp_read(int index); + int get_last_temp_write(int index); + + void copy_propagate(void); + void eliminate_dead_code(void); + void merge_registers(void); + void renumber_registers(void); + + void *mem_ctx; +}; + +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); + +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); + +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, args); + va_end(args); + + prog->LinkStatus = GL_FALSE; +} + +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2) +{ + glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); + int num_reladdr = 0, i; + + /* If we have to do relative addressing, we want to load the ARL + * reg directly for one of the regs, and preload the other reladdr + * sources into temps. + */ + num_reladdr += dst.reladdr != NULL; + num_reladdr += src0.reladdr != NULL; + num_reladdr += src1.reladdr != NULL; + num_reladdr += src2.reladdr != NULL; + + reladdr_to_temp(ir, &src2, &num_reladdr); + reladdr_to_temp(ir, &src1, &num_reladdr); + reladdr_to_temp(ir, &src0, &num_reladdr); + + if (dst.reladdr) { + emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); + num_reladdr--; + } + assert(num_reladdr == 0); + + inst->op = op; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = ir; + + inst->function = NULL; + + if (op == OPCODE_ARL) + this->num_address_regs = 1; + + /* Update indirect addressing status used by TGSI */ + if (dst.reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + default: + break; + } + } + else { + for (i=0; i<3; i++) { + if(inst->src[i].reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + default: + break; + } + } + } + } + + this->instructions.push_tail(inst); + + return inst; +} + + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1) +{ + return emit(ir, op, dst, src0, src1, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0) +{ + assert(dst.writemask != 0); + return emit(ir, op, dst, src0, undef_src, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) +{ + return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); +} + +void +glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0, st_src_reg src1, + unsigned elements) +{ + static const gl_inst_opcode dot_opcodes[] = { + OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 + }; + + emit(ir, dot_opcodes[elements - 2], dst, src0, src1); +} + +/** + * Emits Mesa scalar opcodes to produce unique answers across channels. + * + * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * channel determines the result across all channels. So to do a vec4 + * of this operation, we want to emit a scalar per source channel used + * to produce dest channels. + */ +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg orig_src0, st_src_reg orig_src1) +{ + int i, j; + int done_mask = ~dst.writemask; + + /* Mesa RCP is a scalar operation splatting results to all channels, + * like ARB_fp/vp. So emit as many RCPs as necessary to cover our + * dst channels. + */ + for (i = 0; i < 4; i++) { + GLuint this_mask = (1 << i); + glsl_to_tgsi_instruction *inst; + st_src_reg src0 = orig_src0; + st_src_reg src1 = orig_src1; + + if (done_mask & this_mask) + continue; + + GLuint src0_swiz = GET_SWZ(src0.swizzle, i); + GLuint src1_swiz = GET_SWZ(src1.swizzle, i); + for (j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz && + GET_SWZ(src1.swizzle, j) == src1_swiz) { + this_mask |= (1 << j); + } + } + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, + src1_swiz, src1_swiz); + + inst = emit(ir, op, dst, src0, src1); + inst->dst.writemask = this_mask; + done_mask |= this_mask; + } +} + +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg undef = undef_src; + + undef.swizzle = SWIZZLE_XXXX; + + emit_scalar(ir, op, dst, src0, undef); +} + +/** + * Emit an OPCODE_SCS instruction + * + * The \c SCS opcode functions a bit differently than the other Mesa (or + * ARB_fragment_program) opcodes. Instead of splatting its result across all + * four components of the destination, it writes one value to the \c x + * component and another value to the \c y component. + * + * \param ir IR instruction being processed + * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which + * value is desired. + * \param dst Destination register + * \param src Source register + */ +void +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + const st_src_reg &src) +{ + /* Vertex programs cannot use the SCS opcode. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { + emit_scalar(ir, op, dst, src); + return; + } + + const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned scs_mask = (1U << component); + int done_mask = ~dst.writemask; + st_src_reg tmp; + + assert(op == OPCODE_SIN || op == OPCODE_COS); + + /* If there are compnents in the destination that differ from the component + * that will be written by the SCS instrution, we'll need a temporary. + */ + if (scs_mask != unsigned(dst.writemask)) { + tmp = get_temp(glsl_type::vec4_type); + } + + for (unsigned i = 0; i < 4; i++) { + unsigned this_mask = (1U << i); + st_src_reg src0 = src; + + if ((done_mask & this_mask) != 0) + continue; + + /* The source swizzle specified which component of the source generates + * sine / cosine for the current component in the destination. The SCS + * instruction requires that this value be swizzle to the X component. + * Replace the current swizzle with a swizzle that puts the source in + * the X component. + */ + unsigned src0_swiz = GET_SWZ(src.swizzle, i); + + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + for (unsigned j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz) { + this_mask |= (1 << j); + } + } + + if (this_mask != scs_mask) { + glsl_to_tgsi_instruction *inst; + st_dst_reg tmp_dst = st_dst_reg(tmp); + + /* Emit the SCS instruction. + */ + inst = emit(ir, OPCODE_SCS, tmp_dst, src0); + inst->dst.writemask = scs_mask; + + /* Move the result of the SCS instruction to the desired location in + * the destination. + */ + tmp.swizzle = MAKE_SWIZZLE4(component, component, + component, component); + inst = emit(ir, OPCODE_SCS, dst, tmp); + inst->dst.writemask = this_mask; + } else { + /* Emit the SCS instruction to write directly to the destination. + */ + glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); + inst->dst.writemask = scs_mask; + } + + done_mask |= this_mask; + } +} + +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_float(float val) +{ + st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + + src.index = _mesa_add_unnamed_constant(this->prog->Parameters, + &val, 1, &src.swizzle); + + return src; +} + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +/** + * In the initial pass of codegen, we assign temporary numbers to + * intermediate results. (not SSA -- variable assignments will reuse + * storage). Actual register allocation for the Mesa VM occurs in a + * pass over the Mesa IR later. + */ +st_src_reg +glsl_to_tgsi_visitor::get_temp(const glsl_type *type) +{ + st_src_reg src; + int swizzle[4]; + int i; + + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + src.reladdr = NULL; + next_temp += type_size(type); + + if (type->is_array() || type->is_record()) { + src.swizzle = SWIZZLE_NOOP; + } else { + for (i = 0; i < type->vector_elements; i++) + swizzle[i] = i; + for (; i < 4; i++) + swizzle[i] = type->vector_elements - 1; + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], + swizzle[2], swizzle[3]); + } + src.negate = 0; + + return src; +} + +variable_storage * +glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) +{ + + variable_storage *entry; + + foreach_iter(exec_list_iterator, iter, this->variables) { + entry = (variable_storage *)iter.get(); + + if (entry->var == var) + return entry; + } + + return NULL; +} + +void +glsl_to_tgsi_visitor::visit(ir_variable *ir) +{ + if (strcmp(ir->name, "gl_FragCoord") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + fp->OriginUpperLeft = ir->origin_upper_left; + fp->PixelCenterInteger = ir->pixel_center_integer; + + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + switch (ir->depth_layout) { + case ir_depth_layout_none: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + break; + case ir_depth_layout_any: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + break; + case ir_depth_layout_greater: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case ir_depth_layout_less: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + break; + case ir_depth_layout_unchanged: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + default: + assert(0); + break; + } + } + + if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { + unsigned int i; + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + /* Check if this statevar's setup in the STATE file exactly + * matches how we'll want to reference it as a + * struct/array/whatever. If not, then we need to move it into + * temporary storage and hope that it'll get copy-propagated + * out. + */ + for (i = 0; i < ir->num_state_slots; i++) { + if (slots[i].swizzle != SWIZZLE_XYZW) { + break; + } + } + + struct variable_storage *storage; + st_dst_reg dst; + if (i == ir->num_state_slots) { + /* We'll set the index later. */ + storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); + this->variables.push_tail(storage); + + dst = undef_dst; + } else { + /* The variable_storage constructor allocates slots based on the size + * of the type. However, this had better match the number of state + * elements that we're going to copy into the new temporary. + */ + assert((int) ir->num_state_slots == type_size(ir->type)); + + storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + this->next_temp += type_size(ir->type); + + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); + } + + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + + if (storage->file == PROGRAM_STATE_VAR) { + if (storage->index == -1) { + storage->index = index; + } else { + assert(index == storage->index + (int)i); + } + } else { + st_src_reg src(PROGRAM_STATE_VAR, index, NULL); + src.swizzle = slots[i].swizzle; + emit(ir, OPCODE_MOV, dst, src); + /* even a float takes up a whole vec4 reg in a struct/array. */ + dst.index++; + } + } + + if (storage->file == PROGRAM_TEMPORARY && + dst.index != storage->index + (int) ir->num_state_slots) { + fail_link(this->shader_program, + "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); + } + } +} + +void +glsl_to_tgsi_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(NULL, OPCODE_BGNLOOP); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_exec_list(&ir->body_instructions, this); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(NULL, OPCODE_ENDLOOP); +} + +void +glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(NULL, OPCODE_BRK); + break; + case ir_loop_jump::jump_continue: + emit(NULL, OPCODE_CONT); + break; + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +glsl_to_tgsi_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to glsl_to_tgsi. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + ir->accept(this); + } + } +} + +GLboolean +glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) +{ + int nonmul_operand = 1 - mul_operand; + st_src_reg a, b, c; + + ir_expression *expr = ir->operands[mul_operand]->as_expression(); + if (!expr || expr->operation != ir_binop_mul) + return false; + + expr->operands[0]->accept(this); + a = this->result; + expr->operands[1]->accept(this); + b = this->result; + ir->operands[nonmul_operand]->accept(this); + c = this->result; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); + + return true; +} + +GLboolean +glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) +{ + /* Saturates were only introduced to vertex programs in + * NV_vertex_program3, so don't give them to drivers in the VP. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) + return false; + + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + st_src_reg src = this->result; + + this->result = get_temp(ir->type); + glsl_to_tgsi_instruction *inst; + inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, + st_src_reg *reg, int *num_reladdr) +{ + if (!reg->reladdr) + return; + + emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); + + if (*num_reladdr != 1) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + + emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); + *reg = temp; + } + + (*num_reladdr)--; +} + +void +glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) +{ + /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. + * This means that each of the operands is either an immediate value of -1, + * 0, or 1, or is a component from one source register (possibly with + * negation). + */ + uint8_t components[4] = { 0 }; + bool negate[4] = { false }; + ir_variable *var = NULL; + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + assert(op->type->is_scalar()); + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + + assert(op->type->is_scalar()); + + const ir_constant *const c = op->as_constant(); + if (c->is_one()) { + components[i] = SWIZZLE_ONE; + } else if (c->is_zero()) { + components[i] = SWIZZLE_ZERO; + } else if (c->is_negative_one()) { + components[i] = SWIZZLE_ONE; + negate[i] = true; + } else { + assert(!"SWZ constant must be 0.0 or 1.0."); + } + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const deref = + (ir_dereference_variable *) op; + + assert((var == NULL) || (deref->var == var)); + components[i] = SWIZZLE_X; + var = deref->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const expr = (ir_expression *) op; + + assert(expr->operation == ir_unop_neg); + negate[i] = true; + + op = expr->operands[0]; + break; + } + + case ir_type_swizzle: { + ir_swizzle *const swiz = (ir_swizzle *) op; + + components[i] = swiz->mask.x; + op = swiz->val; + break; + } + + default: + assert(!"Should not get here."); + return; + } + } + } + + assert(var != NULL); + + ir_dereference_variable *const deref = + new(mem_ctx) ir_dereference_variable(var); + + this->result.file = PROGRAM_UNDEFINED; + deref->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + deref->accept(&v); + exit(1); + } + + st_src_reg src; + + src = this->result; + src.swizzle = MAKE_SWIZZLE4(components[0], + components[1], + components[2], + components[3]); + src.negate = ((unsigned(negate[0]) << 0) + | (unsigned(negate[1]) << 1) + | (unsigned(negate[2]) << 2) + | (unsigned(negate[3]) << 3)); + + /* Storage for our result. Ideally for an assignment we'd be using the + * actual storage for the result here, instead. + */ + const st_src_reg result_src = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(result_src); + + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + emit(ir, OPCODE_SWZ, result_dst, src); + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + st_src_reg op[Elements(ir->operands)]; + st_src_reg result_src; + st_dst_reg result_dst; + + /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) + */ + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 1)) + return; + if (try_emit_mad(ir, 0)) + return; + } + if (try_emit_sat(ir)) + return; + + if (ir->operation == ir_quadop_vector) { + this->emit_swz(ir); + return; + } + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = PROGRAM_UNDEFINED; + ir->operands[operand]->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = PROGRAM_UNDEFINED; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(ir->type); + /* convenience for the emit functions below. */ + result_dst = st_dst_reg(result_src); + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + break; + case ir_unop_neg: + op[0].negate = ~op[0].negate; + result_src = op[0]; + break; + case ir_unop_abs: + emit(ir, OPCODE_ABS, result_dst, op[0]); + break; + case ir_unop_sign: + emit(ir, OPCODE_SSG, result_dst, op[0]); + break; + case ir_unop_rcp: + emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_log2: + emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); + break; + case ir_unop_sin: + emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + emit_scalar(ir, OPCODE_COS, result_dst, op[0]); + break; + case ir_unop_sin_reduced: + emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos_reduced: + emit_scs(ir, OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + emit(ir, OPCODE_DDX, result_dst, op[0]); + break; + case ir_unop_dFdy: + emit(ir, OPCODE_DDY, result_dst, op[0]); + break; + + case ir_unop_noise: { + const enum prog_opcode opcode = + prog_opcode(OPCODE_NOISE1 + + (ir->operands[0]->type->vector_elements) - 1); + assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); + + emit(ir, opcode, result_dst, op[0]); + break; + } + + case ir_binop_add: + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); + break; + + case ir_binop_mul: + emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); + break; + case ir_binop_greater: + emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); + break; + case ir_binop_lequal: + emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); + break; + case ir_binop_gequal: + emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); + break; + case ir_binop_equal: + emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + break; + case ir_binop_nequal: + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + break; + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_dp(ir, result_dst, temp, temp, vector_elements); + emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_dp(ir, result_dst, temp, temp, vector_elements); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + } + break; + + case ir_unop_any: + assert(ir->operands[0]->type->is_vector()); + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + break; + + case ir_binop_logic_xor: + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + /* This could be a saturated add and skip the SNE. */ + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + break; + + case ir_binop_logic_and: + /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ + emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(ir, result_dst, op[0], op[1], + ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + /* sqrt(x) = x * rsq(x). */ + emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); + /* For incoming channels <= 0, set the result to 0. */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_CMP, result_dst, + op[0], result_src, st_src_reg_for_float(0.0)); + break; + case ir_unop_rsq: + emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_b2f: + case ir_unop_b2i: + /* Mesa IR lacks types, ints are stored as truncated floats. */ + result_src = op[0]; + break; + case ir_unop_f2i: + emit(ir, OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: + emit(ir, OPCODE_SNE, result_dst, + op[0], st_src_reg_for_float(0.0)); + break; + case ir_unop_trunc: + emit(ir, OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_FLR, result_dst, op[0]); + result_src.negate = ~result_src.negate; + break; + case ir_unop_floor: + emit(ir, OPCODE_FLR, result_dst, op[0]); + break; + case ir_unop_fract: + emit(ir, OPCODE_FRC, result_dst, op[0]); + break; + + case ir_binop_min: + emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); + break; + case ir_binop_max: + emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); + break; + case ir_binop_pow: + emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + case ir_unop_u2f: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + /* This operation should have already been handled. + */ + assert(!"Should not get here."); + break; + } + + this->result = result_src; +} + + +void +glsl_to_tgsi_visitor::visit(ir_swizzle *ir) +{ + st_src_reg src; + int i; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != PROGRAM_UNDEFINED); + + for (i = 0; i < 4; i++) { + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } else { + /* If the type is smaller than a vec4, replicate the last + * channel out. + */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + } + + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) +{ + variable_storage *entry = find_variable_storage(ir->var); + ir_variable *var = ir->var; + + if (!entry) { + switch (var->mode) { + case ir_var_uniform: + entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, + var->location); + this->variables.push_tail(entry); + break; + case ir_var_in: + case ir_var_inout: + /* The linker assigns locations for varyings and attributes, + * including deprecated builtins (like gl_Color), user-assign + * generic attributes (glBindVertexLocation), and + * user-defined varyings. + * + * FINISHME: We would hit this path for function arguments. Fix! + */ + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->location); + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && + var->location >= VERT_ATTRIB_GENERIC0) { + _mesa_add_attribute(this->prog->Attributes, + var->name, + _mesa_sizeof_glsl_type(var->type->gl_type), + var->type->gl_type, + var->location - VERT_ATTRIB_GENERIC0); + } + break; + case ir_var_out: + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->location); + break; + case ir_var_system_value: + entry = new(mem_ctx) variable_storage(var, + PROGRAM_SYSTEM_VALUE, + var->location); + break; + case ir_var_auto: + case ir_var_temporary: + entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(entry); + + next_temp += type_size(var->type); + break; + } + + if (!entry) { + printf("Failed to make storage for %s\n", var->name); + exit(1); + } + } + + this->result = st_src_reg(entry->file, entry->index, var->type); +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + st_src_reg src; + int element_size = type_size(ir->type); + + index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (index) { + src.index += index->value.i[0] * element_size; + } else { + st_src_reg array_base = this->result; + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + st_src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_MUL, st_dst_reg(index_reg), + this->result, st_src_reg_for_float(element_size)); + } + + src.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = SWIZZLE_NOOP; + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = SWIZZLE_NOOP; + + this->result.index += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static st_dst_reg +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return st_dst_reg(v->result); +} + +/** + * Process the condition of a conditional assignment + * + * Examines the condition of a conditional assignment to generate the optimal + * first operand of a \c CMP instruction. If the condition is a relational + * operator with 0 (e.g., \c ir_binop_less), the value being compared will be + * used as the source for the \c CMP instruction. Otherwise the comparison + * is processed to a boolean result, and the boolean result is used as the + * operand to the CMP instruction. + */ +bool +glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) +{ + ir_rvalue *src_ir = ir; + bool negate = true; + bool switch_order = false; + + ir_expression *const expr = ir->as_expression(); + if ((expr != NULL) && (expr->get_num_operands() == 2)) { + bool zero_on_left = false; + + if (expr->operands[0]->is_zero()) { + src_ir = expr->operands[1]; + zero_on_left = true; + } else if (expr->operands[1]->is_zero()) { + src_ir = expr->operands[0]; + zero_on_left = false; + } + + /* a is - 0 + - 0 + + * (a < 0) T F F ( a < 0) T F F + * (0 < a) F F T (-a < 0) F F T + * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) + * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) + * (a > 0) F F T (-a < 0) F F T + * (0 > a) T F F ( a < 0) T F F + * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) + * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) + * + * Note that exchanging the order of 0 and 'a' in the comparison simply + * means that the value of 'a' should be negated. + */ + if (src_ir != ir) { + switch (expr->operation) { + case ir_binop_less: + switch_order = false; + negate = zero_on_left; + break; + + case ir_binop_greater: + switch_order = false; + negate = !zero_on_left; + break; + + case ir_binop_lequal: + switch_order = true; + negate = !zero_on_left; + break; + + case ir_binop_gequal: + switch_order = true; + negate = zero_on_left; + break; + + default: + /* This isn't the right kind of comparison afterall, so make sure + * the whole condition is visited. + */ + src_ir = ir; + break; + } + } + } + + src_ir->accept(this); + + /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + * condition we produced is 0.0 or 1.0. By flipping the sign, we can + * choose which value OPCODE_CMP produces without an extra instruction + * computing the condition. + */ + if (negate) + this->result.negate = ~this->result.negate; + + return switch_order; +} + +void +glsl_to_tgsi_visitor::visit(ir_assignment *ir) +{ + st_dst_reg l; + st_src_reg r; + int i; + + ir->rhs->accept(this); + r = this->result; + + l = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). This case can only + * FINISHME: occur for matrices, arrays, and structures. + */ + if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); + l.writemask = WRITEMASK_XYZW; + } else if (ir->lhs->type->is_scalar()) { + /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the + * FINISHME: W component of fragment shader output zero, work correctly. + */ + l.writemask = WRITEMASK_XYZW; + } else { + int swizzles[4]; + int first_enabled_chan = 0; + int rhs_chan = 0; + + assert(ir->lhs->type->is_vector()); + l.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) { + first_enabled_chan = GET_SWZ(r.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while Mesa IR treats write_mask as just + * showing which channels of the vec4 RHS get written. + */ + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) + swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); + else + swizzles[i] = first_enabled_chan; + } + r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + } + + assert(l.file != PROGRAM_UNDEFINED); + assert(r.file != PROGRAM_UNDEFINED); + + if (ir->condition) { + const bool switch_order = this->process_move_condition(ir->condition); + st_src_reg condition = this->result; + + for (i = 0; i < type_size(ir->lhs->type); i++) { + if (switch_order) { + emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); + } else { + emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); + } + + l.index++; + r.index++; + } + } else { + for (i = 0; i < type_size(ir->lhs->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_constant *ir) +{ + st_src_reg src; + GLfloat stack_vals[4] = { 0 }; + GLfloat *values = stack_vals; + unsigned int i; + + /* Unfortunately, 4 floats is all we can get into + * _mesa_add_unnamed_constant. So, make a temp to store an + * aggregate constant and move each constant value into it. If we + * get lucky, copy propagation will eliminate the extra moves. + */ + + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src = this->result; + + for (i = 0; i < (unsigned int)size; i++) { + emit(ir, OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + + for (i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src = this->result; + for (int j = 0; j < size; j++) { + emit(ir, OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_matrix()) { + st_src_reg mat = get_temp(ir->type); + st_dst_reg mat_column = st_dst_reg(mat); + + for (i = 0; i < ir->type->matrix_columns; i++) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + values = &ir->value.f[i * ir->type->vector_elements]; + + src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); + src.index = _mesa_add_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + &src.swizzle); + emit(ir, OPCODE_MOV, mat_column, src); + + mat_column.index++; + } + + this->result = mat; + return; + } + + src.file = PROGRAM_CONSTANT; + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + values = &ir->value.f[0]; + break; + case GLSL_TYPE_UINT: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.u[i]; + } + break; + case GLSL_TYPE_INT: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.i[i]; + } + break; + case GLSL_TYPE_BOOL: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.b[i]; + } + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + + this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); + this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + &this->result.swizzle); +} + +function_entry * +glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) +{ + function_entry *entry; + + foreach_iter(exec_list_iterator, iter, this->function_signatures) { + entry = (function_entry *)iter.get(); + + if (entry->sig == sig) + return entry; + } + + entry = ralloc(mem_ctx, function_entry); + entry->sig = sig; + entry->sig_id = this->next_signature_id++; + entry->bgn_inst = NULL; + + /* Allocate storage for all the parameters. */ + foreach_iter(exec_list_iterator, iter, sig->parameters) { + ir_variable *param = (ir_variable *)iter.get(); + variable_storage *storage; + + storage = find_variable_storage(param); + assert(!storage); + + storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + + this->next_temp += type_size(param->type); + } + + if (!sig->return_type->is_void()) { + entry->return_reg = get_temp(sig->return_type); + } else { + entry->return_reg = undef_src; + } + + this->function_signatures.push_tail(entry); + return entry; +} + +void +glsl_to_tgsi_visitor::visit(ir_call *ir) +{ + glsl_to_tgsi_instruction *call_inst; + ir_function_signature *sig = ir->get_callee(); + function_entry *entry = get_function_signature(sig); + int i; + + /* Process in parameters. */ + exec_list_iterator sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_in || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + param_rval->accept(this); + st_src_reg r = this->result; + + st_dst_reg l; + l.file = storage->file; + l.index = storage->index; + l.reladdr = NULL; + l.writemask = WRITEMASK_XYZW; + l.cond_mask = COND_TR; + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Emit call instruction */ + call_inst = emit(ir, OPCODE_CAL); + call_inst->function = entry; + + /* Process out parameters. */ + sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_out || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + st_src_reg r; + r.file = storage->file; + r.index = storage->index; + r.reladdr = NULL; + r.swizzle = SWIZZLE_NOOP; + r.negate = 0; + + param_rval->accept(this); + st_dst_reg l = st_dst_reg(this->result); + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Process return value. */ + this->result = entry->return_reg; +} + +void +glsl_to_tgsi_visitor::visit(ir_texture *ir) +{ + st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_dst_reg result_dst, coord_dst; + glsl_to_tgsi_instruction *inst = NULL; + prog_opcode opcode = OPCODE_NOP; + + ir->coordinate->accept(this); + + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. Mesa IR optimization should + * handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, OPCODE_MOV, coord_dst, this->result); + + if (ir->projector) { + ir->projector->accept(this); + projector = this->result; + } + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(glsl_type::vec4_type); + result_dst = st_dst_reg(result_src); + + switch (ir->op) { + case ir_tex: + opcode = OPCODE_TEX; + break; + case ir_txb: + opcode = OPCODE_TXB; + ir->lod_info.bias->accept(this); + lod_info = this->result; + break; + case ir_txl: + opcode = OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txd: + opcode = OPCODE_TXD; + ir->lod_info.grad.dPdx->accept(this); + dx = this->result; + ir->lod_info.grad.dPdy->accept(this); + dy = this->result; + break; + case ir_txf: // TODO: use TGSI_OPCODE_TXF here + assert(!"GLSL 1.30 features unsupported"); + break; + } + + if (ir->projector) { + if (opcode == OPCODE_TEX) { + /* Slot the projector in as the last component of the coord. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_MOV, coord_dst, projector); + coord_dst.writemask = WRITEMASK_XYZW; + opcode = OPCODE_TXP; + } else { + st_src_reg coord_w = coord; + coord_w.swizzle = SWIZZLE_WWWW; + + /* For the other TEX opcodes there's no projective version + * since the last slot is taken up by lod info. Do the + * projective divide now. + */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_RCP, coord_dst, projector); + + /* In the case where we have to project the coordinates "by hand," + * the shadow comparitor value must also be projected. + */ + st_src_reg tmp_src = coord; + if (ir->shadow_comparitor) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + + tmp_src = get_temp(glsl_type::vec4_type); + st_dst_reg tmp_dst = st_dst_reg(tmp_src); + + tmp_dst.writemask = WRITEMASK_Z; + emit(ir, OPCODE_MOV, tmp_dst, this->result); + + tmp_dst.writemask = WRITEMASK_XY; + emit(ir, OPCODE_MOV, tmp_dst, coord); + } + + coord_dst.writemask = WRITEMASK_XYZ; + emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); + + coord_dst.writemask = WRITEMASK_XYZW; + coord.swizzle = SWIZZLE_XYZW; + } + } + + /* If projection is done and the opcode is not OPCODE_TXP, then the shadow + * comparitor was put in the correct place (and projected) by the code, + * above, that handles by-hand projection. + */ + if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + coord_dst.writemask = WRITEMASK_Z; + emit(ir, OPCODE_MOV, coord_dst, this->result); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { + /* Mesa IR stores lod or lod bias in the last channel of the coords. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_MOV, coord_dst, lod_info); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == OPCODE_TXD) + inst = emit(ir, opcode, result_dst, coord, dx, dy); + else + inst = emit(ir, opcode, result_dst, coord); + + if (ir->shadow_comparitor) + inst->tex_shadow = GL_TRUE; + + inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, + this->shader_program, + this->prog); + + const glsl_type *sampler_type = ir->sampler->type; + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + assert(!"FINISHME: Implement ARB_texture_buffer_object"); + break; + default: + assert(!"Should not get here."); + } + + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_return *ir) +{ + if (ir->get_value()) { + st_dst_reg l; + int i; + + assert(current_function); + + ir->get_value()->accept(this); + st_src_reg r = this->result; + + l = st_dst_reg(current_function->return_reg); + + for (i = 0; i < type_size(current_function->sig->return_type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + emit(ir, OPCODE_RET); +} + +void +glsl_to_tgsi_visitor::visit(ir_discard *ir) +{ + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + emit(ir, OPCODE_KIL, undef_dst, this->result); + } else { + emit(ir, OPCODE_KIL_NV); + } + + fp->UsesKill = GL_TRUE; +} + +void +glsl_to_tgsi_visitor::visit(ir_if *ir) +{ + glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *prev_inst; + + prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + ir->condition->accept(this); + assert(this->result.file != PROGRAM_UNDEFINED); + + if (this->options->EmitCondCodes) { + cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + /* See if we actually generated any instruction for generating + * the condition. If not, then cook up a move to a temp so we + * have something to set cond_update on. + */ + if (cond_inst == prev_inst) { + st_src_reg temp = get_temp(glsl_type::bool_type); + cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); + } + cond_inst->cond_update = GL_TRUE; + + if_inst = emit(ir->condition, OPCODE_IF); + if_inst->dst.cond_mask = COND_NE; + } else { + if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); + } + + this->instructions.push_tail(if_inst); + + visit_exec_list(&ir->then_instructions, this); + + if (!ir->else_instructions.is_empty()) { + else_inst = emit(ir->condition, OPCODE_ELSE); + visit_exec_list(&ir->else_instructions, this); + } + + if_inst = emit(ir->condition, OPCODE_ENDIF); +} + +glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() +{ + result.file = PROGRAM_UNDEFINED; + next_temp = 1; + next_signature_id = 1; + current_function = NULL; + num_address_regs = 0; + indirect_addr_temps = false; + indirect_addr_consts = false; + mem_ctx = ralloc_context(NULL); +} + +glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() +{ + ralloc_free(mem_ctx); +} + +extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) +{ + delete v; +} + +static struct prog_src_register +mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg) +{ + struct prog_src_register mesa_reg; + + mesa_reg.File = reg.file; + assert(reg.index < (1 << INST_INDEX_BITS)); + mesa_reg.Index = reg.index; + mesa_reg.Swizzle = reg.swizzle; + mesa_reg.RelAddr = reg.reladdr != NULL; + mesa_reg.Negate = reg.negate; + mesa_reg.Abs = 0; + mesa_reg.HasIndex2 = GL_FALSE; + mesa_reg.RelAddr2 = 0; + mesa_reg.Index2 = 0; + + return mesa_reg; +} + +static void +set_branchtargets(glsl_to_tgsi_visitor *v, + struct prog_instruction *mesa_instructions, + int num_instructions) +{ + int if_count = 0, loop_count = 0; + int *if_stack, *loop_stack; + int if_stack_pos = 0, loop_stack_pos = 0; + int i, j; + + for (i = 0; i < num_instructions; i++) { + switch (mesa_instructions[i].Opcode) { + case OPCODE_IF: + if_count++; + break; + case OPCODE_BGNLOOP: + loop_count++; + break; + case OPCODE_BRK: + case OPCODE_CONT: + mesa_instructions[i].BranchTarget = -1; + break; + default: + break; + } + } + + if_stack = rzalloc_array(v->mem_ctx, int, if_count); + loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); + + for (i = 0; i < num_instructions; i++) { + switch (mesa_instructions[i].Opcode) { + case OPCODE_IF: + if_stack[if_stack_pos] = i; + if_stack_pos++; + break; + case OPCODE_ELSE: + mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; + if_stack[if_stack_pos - 1] = i; + break; + case OPCODE_ENDIF: + mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; + if_stack_pos--; + break; + case OPCODE_BGNLOOP: + loop_stack[loop_stack_pos] = i; + loop_stack_pos++; + break; + case OPCODE_ENDLOOP: + loop_stack_pos--; + /* Rewrite any breaks/conts at this nesting level (haven't + * already had a BranchTarget assigned) to point to the end + * of the loop. + */ + for (j = loop_stack[loop_stack_pos]; j < i; j++) { + if (mesa_instructions[j].Opcode == OPCODE_BRK || + mesa_instructions[j].Opcode == OPCODE_CONT) { + if (mesa_instructions[j].BranchTarget == -1) { + mesa_instructions[j].BranchTarget = i; + } + } + } + /* The loop ends point at each other. */ + mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; + mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; + break; + case OPCODE_CAL: + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (entry->sig_id == mesa_instructions[i].BranchTarget) { + mesa_instructions[i].BranchTarget = entry->inst; + break; + } + } + break; + default: + break; + } + } +} + +static void +print_program(struct prog_instruction *mesa_instructions, + ir_instruction **mesa_instruction_annotation, + int num_instructions) +{ + /*ir_instruction *last_ir = NULL;*/ + int i; + int indent = 0; + + for (i = 0; i < num_instructions; i++) { + struct prog_instruction *mesa_inst = mesa_instructions + i; + + fprintf(stdout, "%3d: ", i); + +#if 0 +/* Disable this for now, since printing GLSL IR along with its corresponding + * Mesa IR makes the Mesa IR unreadable. */ + ir_instruction *ir = mesa_instruction_annotation[i]; + if (last_ir != ir && ir) { + int j; + + for (j = 0; j < indent; j++) { + fprintf(stdout, " "); + } + ir->print(); + printf("\n"); + last_ir = ir; + + fprintf(stdout, " "); /* line number spacing. */ + } +#endif + + indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, + PROG_PRINT_DEBUG, NULL); + } +} + + +/** + * Count resources used by the given gpu program (number of texture + * samplers, etc). + */ +static void +count_resources(struct gl_program *prog) +{ + unsigned int i; + + prog->SamplersUsed = 0; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = &prog->Instructions[i]; + + if (_mesa_is_tex_instruction(inst->Opcode)) { + prog->SamplerTargets[inst->TexSrcUnit] = + (gl_texture_index)inst->TexSrcTarget; + prog->SamplersUsed |= 1 << inst->TexSrcUnit; + if (inst->TexShadow) { + prog->ShadowSamplers |= 1 << inst->TexSrcUnit; + } + } + } + + _mesa_update_shader_textures_used(prog); +} + + +/** + * Check if the given vertex/fragment/shader program is within the + * resource limits of the context (number of texture units, etc). + * If any of those checks fail, record a linker error. + * + * XXX more checks are needed... + */ +static void +check_resources(const struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_program *prog) +{ + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxVertexTextureImageUnits) { + fail_link(shader_program, "Too many vertex shader texture samplers"); + } + if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many vertex shader constants"); + } + break; + case MESA_GEOMETRY_PROGRAM: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxGeometryTextureImageUnits) { + fail_link(shader_program, "Too many geometry shader texture samplers"); + } + if (prog->Parameters->NumParameters > + MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { + fail_link(shader_program, "Too many geometry shader constants"); + } + break; + case GL_FRAGMENT_PROGRAM_ARB: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxTextureImageUnits) { + fail_link(shader_program, "Too many fragment shader texture samplers"); + } + if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many fragment shader constants"); + } + break; + default: + _mesa_problem(ctx, "unexpected program type in check_resources()"); + } +} + + + +struct uniform_sort { + struct gl_uniform *u; + int pos; +}; + +/* The shader_program->Uniforms list is almost sorted in increasing + * uniform->{Frag,Vert}Pos locations, but not quite when there are + * uniforms shared between targets. We need to add parameters in + * increasing order for the targets. + */ +static int +sort_uniforms(const void *a, const void *b) +{ + struct uniform_sort *u1 = (struct uniform_sort *)a; + struct uniform_sort *u2 = (struct uniform_sort *)b; + + return u1->pos - u2->pos; +} + +/* Add the uniforms to the parameters. The linker chose locations + * in our parameters lists (which weren't created yet), which the + * uniforms code will use to poke values into our parameters list + * when uniforms are updated. + */ +static void +add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, + struct gl_shader *shader, + struct gl_program *prog) +{ + unsigned int i; + unsigned int next_sampler = 0, num_uniforms = 0; + struct uniform_sort *sorted_uniforms; + + sorted_uniforms = ralloc_array(NULL, struct uniform_sort, + shader_program->Uniforms->NumUniforms); + + for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { + struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; + int parameter_index = -1; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + parameter_index = uniform->VertPos; + break; + case GL_FRAGMENT_SHADER: + parameter_index = uniform->FragPos; + break; + case GL_GEOMETRY_SHADER: + parameter_index = uniform->GeomPos; + break; + } + + /* Only add uniforms used in our target. */ + if (parameter_index != -1) { + sorted_uniforms[num_uniforms].pos = parameter_index; + sorted_uniforms[num_uniforms].u = uniform; + num_uniforms++; + } + } + + qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), + sort_uniforms); + + for (i = 0; i < num_uniforms; i++) { + struct gl_uniform *uniform = sorted_uniforms[i].u; + int parameter_index = sorted_uniforms[i].pos; + const glsl_type *type = uniform->Type; + unsigned int size; + + if (type->is_vector() || + type->is_scalar()) { + size = type->vector_elements; + } else { + size = type_size(type) * 4; + } + + gl_register_file file; + if (type->is_sampler() || + (type->is_array() && type->fields.array->is_sampler())) { + file = PROGRAM_SAMPLER; + } else { + file = PROGRAM_UNIFORM; + } + + GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, + uniform->Name); + + if (index < 0) { + index = _mesa_add_parameter(prog->Parameters, file, + uniform->Name, size, type->gl_type, + NULL, NULL, 0x0); + + /* Sampler uniform values are stored in prog->SamplerUnits, + * and the entry in that array is selected by this index we + * store in ParameterValues[]. + */ + if (file == PROGRAM_SAMPLER) { + for (unsigned int j = 0; j < size / 4; j++) + prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + } + + /* The location chosen in the Parameters list here (returned + * from _mesa_add_uniform) has to match what the linker chose. + */ + if (index != parameter_index) { + fail_link(shader_program, "Allocation of uniform `%s' to target " + "failed (%d vs %d)\n", + uniform->Name, index, parameter_index); + } + } + } + + ralloc_free(sorted_uniforms); +} + +static void +set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, + struct gl_shader_program *shader_program, + const char *name, const glsl_type *type, + ir_constant *val) +{ + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, + field_type, field_constant); + field_constant = (ir_constant *)field_constant->next; + } + return; + } + + int loc = _mesa_get_uniform_location(ctx, shader_program, name); + + if (loc == -1) { + fail_link(shader_program, + "Couldn't find uniform for initializer %s\n", name); + return; + } + + for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { + ir_constant *element; + const glsl_type *element_type; + if (type->is_array()) { + element = val->array_elements[i]; + element_type = type->fields.array; + } else { + element = val; + element_type = type; + } + + void *values; + + if (element_type->base_type == GLSL_TYPE_BOOL) { + int *conv = ralloc_array(mem_ctx, int, element_type->components()); + for (unsigned int j = 0; j < element_type->components(); j++) { + conv[j] = element->value.b[j]; + } + values = (void *)conv; + element_type = glsl_type::get_instance(GLSL_TYPE_INT, + element_type->vector_elements, + 1); + } else { + values = &element->value; + } + + if (element_type->is_matrix()) { + _mesa_uniform_matrix(ctx, shader_program, + element_type->matrix_columns, + element_type->vector_elements, + loc, 1, GL_FALSE, (GLfloat *)values); + loc += element_type->matrix_columns; + } else { + _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, + values, element_type->gl_type); + loc += type_size(element_type); + } + } +} + +static void +set_uniform_initializers(struct gl_context *ctx, + struct gl_shader_program *shader_program) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_shader *shader = shader_program->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + ir_variable *var = ir->as_variable(); + + if (!var || var->mode != ir_var_uniform || !var->constant_value) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, + var->type, var->constant_value); + } + } + + ralloc_free(mem_ctx); +} + +/* Replaces all references to a temporary register index with another index. */ +void +glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) +{ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned j; + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + inst->src[j].index = new_index; + } + } + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + inst->dst.index = new_index; + } + } +} + +int +glsl_to_tgsi_visitor::get_first_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + return (depth == 0) ? i : loop_start; + } + } + + if (inst->op == OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_first_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + return (depth == 0) ? i : loop_start; + } + + if (inst->op == OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_last_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that reads the temporary */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + last = (depth == 0) ? i : -2; + } + } + + if (inst->op == OPCODE_BGNLOOP) + depth++; + else if (inst->op == OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +int +glsl_to_tgsi_visitor::get_last_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that writes to the temporary */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) + last = (depth == 0) ? i : -2; + + if (inst->op == OPCODE_BGNLOOP) + depth++; + else if (inst->op == OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY register + * channels for copy propagation and updates following instructions to + * use the original versions. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; + * + * and after: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * which allows for dead code elimination on TEMP[1]'s writes. + */ +void +glsl_to_tgsi_visitor::copy_propagate(void) +{ + glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + /* First, do any copy propagation possible into the src regs. */ + for (int r = 0; r < 3; r++) { + glsl_to_tgsi_instruction *first = NULL; + bool good = true; + int acp_base = inst->src[r].index * 4; + + if (inst->src[r].file != PROGRAM_TEMPORARY || + inst->src[r].reladdr) + continue; + + /* See if we can find entries in the ACP consisting of MOVs + * from the same src register for all the swizzled channels + * of this src register reference. + */ + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; + + if (!copy_chan) { + good = false; + break; + } + + assert(acp_level[acp_base + src_chan] <= level); + + if (!first) { + first = copy_chan; + } else { + if (first->src[0].file != copy_chan->src[0].file || + first->src[0].index != copy_chan->src[0].index) { + good = false; + break; + } + } + } + + if (good) { + /* We've now validated that we can copy-propagate to + * replace this src register reference. Do it. + */ + inst->src[r].file = first->src[0].file; + inst->src[r].index = first->src[0].index; + + int swizzle = 0; + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; + swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << + (3 * i)); + } + inst->src[r].swizzle = swizzle; + } + } + + switch (inst->op) { + case OPCODE_BGNLOOP: + case OPCODE_ENDLOOP: + /* End of a basic block, clear the ACP entirely. */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + break; + + case OPCODE_IF: + ++level; + break; + + case OPCODE_ENDIF: + case OPCODE_ELSE: + /* Clear all channels written inside the block from the ACP, but + * leaving those that were not touched. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp_level[4 * r + c] >= level) + acp[4 * r + c] = NULL; + } + } + if (inst->op == OPCODE_ENDIF) + --level; + break; + + default: + /* Continuing the block, clear any written channels from + * the ACP. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { + /* Any temporary might be written, so no copy propagation + * across this instruction. + */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + } else if (inst->dst.file == PROGRAM_OUTPUT && + inst->dst.reladdr) { + /* Any output might be written, so no copy propagation + * from outputs across this instruction. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) + acp[4 * r + c] = NULL; + } + } + } else if (inst->dst.file == PROGRAM_TEMPORARY || + inst->dst.file == PROGRAM_OUTPUT) { + /* Clear where it's used as dst. */ + if (inst->dst.file == PROGRAM_TEMPORARY) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + acp[4 * inst->dst.index + c] = NULL; + } + } + } + + /* Clear where it's used as src. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); + + if (acp[4 * r + c]->src[0].file == inst->dst.file && + acp[4 * r + c]->src[0].index == inst->dst.index && + inst->dst.writemask & (1 << src_chan)) + { + acp[4 * r + c] = NULL; + } + } + } + } + break; + } + + /* If this is a copy, add it to the ACP. */ + if (inst->op == OPCODE_MOV && + inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate && + !inst->src[0].reladdr && + !inst->src[0].negate) { + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) { + acp[4 * inst->dst.index + i] = inst; + acp_level[4 * inst->dst.index + i] = level; + } + } + } + } + + ralloc_free(acp_level); + ralloc_free(acp); +} + +/* + * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production after copy propagation but + * before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * and after this pass: + * + * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) + * FIXME: doesn't eliminate all dead code inside of loops; it steps around them + */ +void +glsl_to_tgsi_visitor::eliminate_dead_code(void) +{ + int i; + + for (i=0; i < this->next_temp; i++) { + int last_read = get_last_temp_read(i); + int j = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && + j > last_read) + { + iter.remove(); + delete inst; + } + + j++; + } + } +} + +/* Merges temporary registers together where possible to reduce the number of + * registers needed to run a program. + * + * Produces optimal code only after copy propagation and dead code elimination + * have been run. */ +void +glsl_to_tgsi_visitor::merge_registers(void) +{ + int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int i, j; + + /* Read the indices of the last read and first write to each temp register + * into an array so that we don't have to traverse the instruction list as + * much. */ + for (i=0; i < this->next_temp; i++) { + last_reads[i] = get_last_temp_read(i); + first_writes[i] = get_first_temp_write(i); + } + + /* Start looking for registers with non-overlapping usages that can be + * merged together. */ + for (i=0; i < this->next_temp - 1; i++) { + /* Don't touch unused registers. */ + if (last_reads[i] < 0 || first_writes[i] < 0) continue; + + for (j=i+1; j < this->next_temp; j++) { + /* Don't touch unused registers. */ + if (last_reads[j] < 0 || first_writes[j] < 0) continue; + + /* We can merge the two registers if the first write to j is after or + * in the same instruction as the last read from i. Note that the + * register at index i will always be used earlier or at the same time + * as the register at index j. */ + assert(first_writes[i] <= first_writes[j]); + if (last_reads[i] <= first_writes[j]) { + rename_temp_register(j, i); /* Replace all references to j with i.*/ + + /* Update the first_writes and last_reads arrays with the new + * values for the merged register index, and mark the newly unused + * register index as such. */ + last_reads[i] = last_reads[j]; + first_writes[j] = -1; + last_reads[j] = -1; + } + } + } + + ralloc_free(last_reads); + ralloc_free(first_writes); +} + +/* Reassign indices to temporary registers by reusing unused indices created + * by optimization passes. */ +void +glsl_to_tgsi_visitor::renumber_registers(void) +{ + int i = 0; + int new_index = 0; + + for (i=0; i < this->next_temp; i++) { + if (get_first_temp_read(i) < 0) continue; + if (i != new_index) + rename_temp_register(i, new_index); + new_index++; + } + + this->next_temp = new_index; +} + +/* ------------------------- TGSI conversion stuff -------------------------- */ +struct label { + unsigned branch_target; + unsigned token; +}; + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + + struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_src *constants; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + + /* Extra info for handling point size clamping in vertex shader */ + struct ureg_dst pointSizeResult; /**< Actual point size output register */ + struct ureg_src pointSizeConst; /**< Point size range constant register */ + GLint pointSizeOutIndex; /**< Temp point size output register */ + GLboolean prevInstWrotePointSize; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* For every instruction that contains a label (eg CALL), keep + * details so that we can go back afterwards and emit the correct + * tgsi instruction number for each label. + */ + struct label *labels; + unsigned labels_size; + unsigned labels_count; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + + boolean error; +}; + +/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ +static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_INSTANCEID +}; + +/** + * Make note of a branch to a label in the TGSI code. + * After we've emitted all instructions, we'll go over the list + * of labels built here and patch the TGSI code with the actual + * location of each label. + */ +static unsigned *get_label( struct st_translate *t, + unsigned branch_target ) +{ + unsigned i; + + if (t->labels_count + 1 >= t->labels_size) { + t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); + t->labels = (struct label *)realloc(t->labels, + t->labels_size * sizeof t->labels[0]); + if (t->labels == NULL) { + static unsigned dummy; + t->error = TRUE; + return &dummy; + } + } + + i = t->labels_count++; + t->labels[i].branch_target = branch_target; + return &t->labels[i].token; +} + +/** + * Called prior to emitting the TGSI code for each Mesa instruction. + * Allocate additional space for instructions if needed. + * Update the insn[] array so the next Mesa instruction points to + * the next TGSI instruction. + */ +static void set_insn_start( struct st_translate *t, + unsigned start ) +{ + if (t->insn_count + 1 >= t->insn_size) { + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + +/** + * Map a Mesa dst register to a TGSI ureg_dst register. + */ +static struct ureg_dst +dst_register( struct st_translate *t, + gl_register_file file, + GLuint index ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return ureg_dst_undef(); + + case PROGRAM_TEMPORARY: + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + + return t->temps[index]; + + case PROGRAM_OUTPUT: + if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) + t->prevInstWrotePointSize = GL_TRUE; + + if (t->procType == TGSI_PROCESSOR_VERTEX) + assert(index < VERT_RESULT_MAX); + else if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < GEOM_RESULT_MAX); + + assert(t->outputMapping[index] < Elements(t->outputs)); + + return t->outputs[t->outputMapping[index]]; + + case PROGRAM_ADDRESS: + return t->address[index]; + + default: + debug_assert( 0 ); + return ureg_dst_undef(); + } +} + +/** + * Map a Mesa src register to a TGSI ureg_src register. + */ +static struct ureg_src +src_register( struct st_translate *t, + gl_register_file file, + GLuint index ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return ureg_src_undef(); + + case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + return ureg_src(t->temps[index]); + + case PROGRAM_NAMED_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_UNIFORM: + assert(index >= 0); + return t->constants[index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: /* ie, immediate */ + if (index < 0) + return ureg_DECL_constant( t->ureg, 0 ); + else + return t->constants[index]; + + case PROGRAM_INPUT: + assert(t->inputMapping[index] < Elements(t->inputs)); + return t->inputs[t->inputMapping[index]]; + + case PROGRAM_OUTPUT: + assert(t->outputMapping[index] < Elements(t->outputs)); + return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ + + case PROGRAM_ADDRESS: + return ureg_src(t->address[index]); + + case PROGRAM_SYSTEM_VALUE: + assert(index < Elements(t->systemValues)); + return t->systemValues[index]; + + default: + debug_assert( 0 ); + return ureg_src_undef(); + } +} + +/** + * Create a TGSI ureg_dst register from a Mesa dest register. + */ +static struct ureg_dst +translate_dst( struct st_translate *t, + const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, + boolean saturate ) +{ + struct ureg_dst dst = dst_register( t, + dst_reg->file, + dst_reg->index ); + + dst = ureg_writemask( dst, + dst_reg->writemask ); + + if (saturate) + dst = ureg_saturate( dst ); + + if (dst_reg->reladdr != NULL) + dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + + return dst; +} + +/** + * Create a TGSI ureg_src register from a Mesa src register. + */ +static struct ureg_src +translate_src( struct st_translate *t, + const st_src_reg *src_reg ) +{ + struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + + src = ureg_swizzle( src, + GET_SWZ( src_reg->swizzle, 0 ) & 0x3, + GET_SWZ( src_reg->swizzle, 1 ) & 0x3, + GET_SWZ( src_reg->swizzle, 2 ) & 0x3, + GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + + if ((src_reg->negate & 0xf) == NEGATE_XYZW) + src = ureg_negate(src); + +#if 0 + // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR + if (src_reg->abs) + src = ureg_abs(src); +#endif + + if (src_reg->reladdr != NULL) { + /* Normally ureg_src_indirect() would be used here, but a stupid compiler + * bug in g++ makes ureg_src_indirect (an inline C function) erroneously + * set the bit for src.Negate. So we have to do the operation manually + * here to work around the compiler's problems. */ + /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ + struct ureg_src addr = ureg_src(t->address[0]); + src.Indirect = 1; + src.IndirectFile = addr.File; + src.IndirectIndex = addr.Index; + src.IndirectSwizzle = addr.SwizzleX; + + if (src_reg->file != PROGRAM_INPUT && + src_reg->file != PROGRAM_OUTPUT) { + /* If src_reg->index was negative, it was set to zero in + * src_register(). Reassign it now. But don't do this + * for input/output regs since they get remapped while + * const buffers don't. + */ + src.Index = src_reg->index; + } + } + + return src; +} + +static void +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) +{ + struct ureg_program *ureg = t->ureg; + GLuint i; + struct ureg_dst dst[1]; + struct ureg_src src[4]; + unsigned num_dst; + unsigned num_src; + + num_dst = _mesa_num_inst_dst_regs( inst->op ); + num_src = _mesa_num_inst_src_regs( inst->op ); + + if (num_dst) + dst[0] = translate_dst( t, + &inst->dst, + inst->saturate); // inst->SaturateMode + + for (i = 0; i < num_src; i++) + src[i] = translate_src( t, &inst->src[i] ); + + switch( inst->op ) { + case OPCODE_SWZ: + // TODO: copy emit_swz function from st_mesa_to_tgsi.c + //emit_swz( t, dst[0], &inst->src[0] ); + assert(!"OPCODE_SWZ"); + return; + + case OPCODE_BGNLOOP: + case OPCODE_CAL: + case OPCODE_ELSE: + case OPCODE_ENDLOOP: + case OPCODE_IF: + debug_assert(num_dst == 0); + ureg_label_insn( ureg, + translate_opcode( inst->op ), + src, num_src, + get_label( t, + inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); + return; + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXD: + case OPCODE_TXL: + case OPCODE_TXP: + src[num_src++] = t->samplers[inst->sampler]; + ureg_tex_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + translate_texture_target( inst->tex_target, + inst->tex_shadow ), + src, num_src ); + return; + + case OPCODE_SCS: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + + case OPCODE_XPD: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + break; + + case OPCODE_DDY: + // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c + assert(!"OPCODE_DDY"); + //emit_ddy( t, dst[0], &inst->src[0] ); + break; + + default: + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + } +} + +/** + * Emit the TGSI instructions to adjust the WPOS pixel center convention + * Basically, add (adjX, adjY) to the fragment position. + */ +static void +emit_adjusted_wpos( struct st_translate *t, + const struct gl_program *program, + GLfloat adjX, GLfloat adjY) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* Note that we bias X and Y and pass Z and W through unchanged. + * The shader might also use gl_FragCoord.w and .z. + */ + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); + + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + * This code is unavoidable because it also depends on whether + * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). + */ +static void +emit_wpos_inversion( struct st_translate *t, + const struct gl_program *program, + boolean invert) +{ + struct ureg_program *ureg = t->ureg; + + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index wposTransformState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, + (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed: + */ + unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, + wposTransformState); + + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_dst wpos_temp; + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* MOV wpos_temp, input[wpos] + */ + if (wpos_input.File == TGSI_FILE_TEMPORARY) + wpos_temp = ureg_dst(wpos_input); + else { + wpos_temp = ureg_DECL_temporary( ureg ); + ureg_MOV( ureg, wpos_temp, wpos_input ); + } + + if (invert) { + /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); + } else { + /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); + } + + /* Use wpos_temp as position input from here on: + */ + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit fragment position/ooordinate code. + */ +static void +emit_wpos(struct st_context *st, + struct st_translate *t, + const struct gl_program *program, + struct ureg_program *ureg) +{ + const struct gl_fragment_program *fp = + (const struct gl_fragment_program *) program; + struct pipe_screen *pscreen = st->pipe->screen; + boolean invert = FALSE; + + if (fp->OriginUpperLeft) { + /* Fragment shader wants origin in upper-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { + /* the driver supports upper-left origin */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { + /* the driver supports lower-left origin, need to invert Y */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + invert = TRUE; + } + else + assert(0); + } + else { + /* Fragment shader wants origin in lower-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) + /* the driver supports lower-left origin */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) + /* the driver supports upper-left origin, need to invert Y */ + invert = TRUE; + else + assert(0); + } + + if (fp->PixelCenterInteger) { + /* Fragment shader wants pixel center integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + /* the driver supports pixel center integer */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + /* the driver supports pixel center half integer, need to bias X,Y */ + emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + else + assert(0); + } + else { + /* Fragment shader wants pixel center half integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { + /* the driver supports pixel center half integer */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { + /* the driver supports pixel center integer, need to bias X,Y */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); + } + else + assert(0); + } + + /* we invert after adjustment so that we avoid the MOV to temporary, + * and reuse the adjustment ADD instead */ + emit_wpos_inversion(t, program, invert); +} + +/** + * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. + * \param program the program to translate + * \param numInputs number of input registers used + * \param inputMapping maps Mesa fragment program inputs to TGSI generic + * input indexes + * \param inputSemanticName the TGSI_SEMANTIC flag for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for + * each input + * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + * \param numOutputs number of output registers used + * \param outputMapping maps Mesa fragment program outputs to TGSI + * generic outputs + * \param outputSemanticName the TGSI_SEMANTIC flag for each output + * \param outputSemanticIndex the semantic index (ex: which texcoord) for + * each output + * + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY + */ +extern "C" enum pipe_error +st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags ) +{ + struct st_translate translate, *t; + unsigned i; + enum pipe_error ret = PIPE_OK; + + assert(numInputs <= Elements(t->inputs)); + assert(numOutputs <= Elements(t->outputs)); + + t = &translate; + memset(t, 0, sizeof *t); + + t->procType = procType; + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->pointSizeOutIndex = -1; + t->prevInstWrotePointSize = GL_FALSE; + + /*_mesa_print_program(program);*/ + + /* + * Declare input attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + if (proginfo->InputsRead & FRAG_BIT_WPOS) { + /* Must do this after setting up t->inputs, and before + * emitting constant references, below: + */ + printf("FRAG_BIT_WPOS\n"); + emit_wpos(st_context(ctx), t, proginfo, ureg); + } + + if (proginfo->InputsRead & FRAG_BIT_FACE) { + // TODO: uncomment + printf("FRAG_BIT_FACE\n"); + //emit_face_var( t, program ); + } + + /* + * Declare output attributes. + */ + for (i = 0; i < numOutputs; i++) { + switch (outputSemanticName[i]) { + case TGSI_SEMANTIC_POSITION: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_POSITION, /* Z / Depth */ + outputSemanticIndex[i] ); + + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Z ); + break; + case TGSI_SEMANTIC_STENCIL: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i] ); + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Y ); + break; + case TGSI_SEMANTIC_COLOR: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i] ); + break; + default: + debug_assert(0); + return PIPE_ERROR_BAD_INPUT; + } + } + } + else if (procType == TGSI_PROCESSOR_GEOMETRY) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_gs_input(ureg, + i, + inputSemanticName[i], + inputSemanticIndex[i]); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); + } + } + else { + assert(procType == TGSI_PROCESSOR_VERTEX); + + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); + if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { + /* Writing to the point size result register requires special + * handling to implement clamping. + */ + static const gl_state_index pointSizeClampState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed. + * XXX: depends on "Parameters" field specific to Mesa IR + */ + unsigned pointSizeClampConst = + _mesa_add_state_reference(proginfo->Parameters, + pointSizeClampState); + struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); + t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + t->pointSizeResult = t->outputs[i]; + t->pointSizeOutIndex = i; + t->outputs[i] = psizregtemp; + } + } + /*if (passthrough_edgeflags) + emit_edgeflags( t, program ); */ // TODO: uncomment + } + + /* Declare address register. + */ + if (program->num_address_regs > 0) { + debug_assert( program->num_address_regs == 1 ); + t->address[0] = ureg_DECL_address( ureg ); + } + + /* Declare misc input registers + */ + { + GLbitfield sysInputs = proginfo->SystemValuesRead; + unsigned numSys = 0; + for (i = 0; sysInputs; i++) { + if (sysInputs & (1 << i)) { + unsigned semName = mesa_sysval_to_semantic[i]; + t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + numSys++; + sysInputs &= ~(1 << i); + } + } + } + + if (program->indirect_addr_temps) { + /* If temps are accessed with indirect addressing, declare temporaries + * in sequential order. Else, we declare them on demand elsewhere. + * (Note: the number of temporaries is equal to program->next_temp) + */ + for (i = 0; i < (unsigned)program->next_temp; i++) { + /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ + t->temps[i] = ureg_DECL_temporary( t->ureg ); + } + } + + /* Emit constants and immediates. Mesa uses a single index space + * for these, so we put all the translated regs in t->constants. + * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + */ + if (proginfo->Parameters) { + t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < proginfo->Parameters->NumParameters; i++) { + switch (proginfo->Parameters->Parameters[i].Type) { + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant( ureg, i ); + break; + + /* Emit immediates only when there's no indirect addressing of + * the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ + case PROGRAM_CONSTANT: + if (program->indirect_addr_consts) + t->constants[i] = ureg_DECL_constant( ureg, i ); + else + t->constants[i] = + ureg_DECL_immediate( ureg, + proginfo->Parameters->ParameterValues[i], + 4 ); + break; + default: + break; + } + } + } + + /* texture samplers */ + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + // XXX: depends on SamplersUsed property generated by conversion to Mesa IR + if (proginfo->SamplersUsed & (1 << i)) { + t->samplers[i] = ureg_DECL_sampler( ureg, i ); + } + } + + /* Emit each instruction in turn: + */ + foreach_iter(exec_list_iterator, iter, program->instructions) { + set_insn_start( t, ureg_get_instruction_number( ureg )); + compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + + if (t->prevInstWrotePointSize && proginfo->Id) { + /* The previous instruction wrote to the (fake) vertex point size + * result register. Now we need to clamp that value to the min/max + * point size range, putting the result into the real point size + * register. + * Note that we can't do this easily at the end of program due to + * possible early return. + */ + set_insn_start( t, ureg_get_instruction_number( ureg )); + ureg_MAX( t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + } + t->prevInstWrotePointSize = GL_FALSE; + } + + /* Fix up all emitted labels: + */ + for (i = 0; i < t->labels_count; i++) { + ureg_fixup_label( ureg, + t->labels[i].token, + t->insn[t->labels[i].branch_target] ); + } + +out: + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } + + return ret; +} +/* ----------------------------- End TGSI code ------------------------------ */ + +/** + * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader. + */ +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_shader *shader) +{ + glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + struct prog_instruction *mesa_instructions, *mesa_inst; + ir_instruction **mesa_instruction_annotation; + int i; + struct gl_program *prog; + GLenum target; + const char *target_string; + GLboolean progress; + struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + target = GL_VERTEX_PROGRAM_ARB; + target_string = "vertex"; + break; + case GL_FRAGMENT_SHADER: + target = GL_FRAGMENT_PROGRAM_ARB; + target_string = "fragment"; + break; + case GL_GEOMETRY_SHADER: + target = GL_GEOMETRY_PROGRAM_NV; + target_string = "geometry"; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + validate_ir_tree(shader->ir); + + prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); + if (!prog) + return NULL; + prog->Parameters = _mesa_new_parameter_list(); + prog->Varying = _mesa_new_parameter_list(); + prog->Attributes = _mesa_new_parameter_list(); + v->ctx = ctx; + v->prog = prog; + v->shader_program = shader_program; + v->options = options; + + add_uniforms_to_parameters_list(shader_program, shader, prog); + + /* Emit Mesa IR for main(). */ + visit_exec_list(shader->ir, v); + v->emit(NULL, OPCODE_END); + + /* Now emit bodies for any functions that were used. */ + do { + progress = GL_FALSE; + + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (!entry->bgn_inst) { + v->current_function = entry; + + entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); + entry->bgn_inst->function = entry; + + visit_exec_list(&entry->sig->body, v); + + glsl_to_tgsi_instruction *last; + last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); + if (last->op != OPCODE_RET) + v->emit(NULL, OPCODE_RET); + + glsl_to_tgsi_instruction *end; + end = v->emit(NULL, OPCODE_ENDSUB); + end->function = entry; + + progress = GL_TRUE; + } + } + } while (progress); + +#if 0 + /* Print out some information (for debugging purposes) used by the + * optimization passes. */ + for (i=0; i < v->next_temp; i++) { + int fr = v->get_first_temp_read(i); + int fw = v->get_first_temp_write(i); + int lr = v->get_last_temp_read(i); + int lw = v->get_last_temp_write(i); + + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); + assert(fw <= fr); + } +#endif + + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ + v->copy_propagate(); + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); + + prog->NumTemporaries = v->next_temp; + + int num_instructions = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + num_instructions++; + } + + mesa_instructions = + (struct prog_instruction *)calloc(num_instructions, + sizeof(*mesa_instructions)); + mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *, + num_instructions); + + /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions. + * TODO: remove + */ + mesa_inst = mesa_instructions; + i = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + mesa_inst->Opcode = inst->op; + mesa_inst->CondUpdate = inst->cond_update; + if (inst->saturate) + mesa_inst->SaturateMode = SATURATE_ZERO_ONE; + mesa_inst->DstReg.File = inst->dst.file; + mesa_inst->DstReg.Index = inst->dst.index; + mesa_inst->DstReg.CondMask = inst->dst.cond_mask; + mesa_inst->DstReg.WriteMask = inst->dst.writemask; + mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; + mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]); + mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]); + mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]); + mesa_inst->TexSrcUnit = inst->sampler; + mesa_inst->TexSrcTarget = inst->tex_target; + mesa_inst->TexShadow = inst->tex_shadow; + mesa_instruction_annotation[i] = inst->ir; + + /* Set IndirectRegisterFiles. */ + if (mesa_inst->DstReg.RelAddr) + prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; + + /* Update program's bitmask of indirectly accessed register files */ + for (unsigned src = 0; src < 3; src++) + if (mesa_inst->SrcReg[src].RelAddr) + prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; + + if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { + fail_link(shader_program, "Couldn't flatten if statement\n"); + } + + switch (mesa_inst->Opcode) { + case OPCODE_BGNSUB: + inst->function->inst = i; + mesa_inst->Comment = strdup(inst->function->sig->function_name()); + break; + case OPCODE_ENDSUB: + mesa_inst->Comment = strdup(inst->function->sig->function_name()); + break; + case OPCODE_CAL: + mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ + break; + case OPCODE_ARL: + prog->NumAddressRegs = 1; + break; + default: + break; + } + + mesa_inst++; + i++; + + if (!shader_program->LinkStatus) + break; + } + + if (!shader_program->LinkStatus) { + free(mesa_instructions); + _mesa_reference_program(ctx, &shader->Program, NULL); + return NULL; + } + + set_branchtargets(v, mesa_instructions, num_instructions); + + if (ctx->Shader.Flags & GLSL_DUMP) { + printf("\n"); + printf("GLSL IR for linked %s program %d:\n", target_string, + shader_program->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + printf("\n"); + printf("Mesa IR for linked %s program %d:\n", target_string, + shader_program->Name); + print_program(mesa_instructions, mesa_instruction_annotation, + num_instructions); + } + + prog->Instructions = mesa_instructions; + prog->NumInstructions = num_instructions; + + do_set_program_inouts(shader->ir, prog); + count_resources(prog); + + check_resources(ctx, shader_program, prog); + + _mesa_reference_program(ctx, &shader->Program, prog); + + if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { + _mesa_optimize_program(ctx, prog); + } + + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct st_geometry_program *stgp; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + stvp = (struct st_vertex_program *)prog; + stvp->glsl_to_tgsi = v; + break; + case GL_FRAGMENT_SHADER: + stfp = (struct st_fragment_program *)prog; + stfp->glsl_to_tgsi = v; + break; + case GL_GEOMETRY_SHADER: + stgp = (struct st_geometry_program *)prog; + stgp->glsl_to_tgsi = v; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + return prog; +} + +extern "C" { + +struct gl_shader * +st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) +{ + struct gl_shader *shader; + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || + type == GL_GEOMETRY_SHADER_ARB); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + _mesa_init_shader(ctx, shader); + } + return shader; +} + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name) +{ + struct gl_shader_program *shProg; + shProg = rzalloc(NULL, struct gl_shader_program); + if (shProg) { + shProg->Name = name; + _mesa_init_shader_program(ctx, shProg); + } + return shProg; +} + +/** + * Link a shader. + * Called via ctx->Driver.LinkShader() + * This actually involves converting GLSL IR into Mesa gl_programs with + * code lowering and other optimizations. + */ +GLboolean +st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + assert(prog->LinkStatus); + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool progress; + exec_list *ir = prog->_LinkedShaders[i]->ir; + const struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + + do { + progress = false; + + /* Lowering */ + do_mat_op_to_vec(ir); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + + progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; + + progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + + progress = lower_quadop_vector(ir, true) || progress; + + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; + progress = lower_if_to_cond_assign(ir) || progress; + } + + if (options->EmitNoNoise) + progress = lower_noise(ir) || progress; + + /* If there are forms of indirect addressing that the driver + * cannot handle, perform the lowering pass. + */ + if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput + || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) + progress = + lower_variable_index_to_cond_assign(ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform) + || progress; + + progress = do_vec_index_to_cond_assign(ir) || progress; + } while (progress); + + validate_ir_tree(ir); + } + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_program *linked_prog; + + if (prog->_LinkedShaders[i] == NULL) + continue; + + linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + + if (linked_prog) { + bool ok = true; + + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + _mesa_reference_vertprog(ctx, &prog->VertexProgram, + (struct gl_vertex_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, + linked_prog); + break; + case GL_FRAGMENT_SHADER: + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, + (struct gl_fragment_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, + linked_prog); + break; + case GL_GEOMETRY_SHADER: + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, + (struct gl_geometry_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, + linked_prog); + break; + } + if (!ok) { + return GL_FALSE; + } + } + + _mesa_reference_program(ctx, &linked_prog, NULL); + } + + return GL_TRUE; +} + + +/** + * Link a GLSL shader program. Called via glLinkProgram(). + */ +void +st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned int i; + + _mesa_clear_shader_program_data(ctx, prog); + + prog->LinkStatus = GL_TRUE; + + for (i = 0; i < prog->NumShaders; i++) { + if (!prog->Shaders[i]->CompileStatus) { + fail_link(prog, "linking with uncompiled shader"); + prog->LinkStatus = GL_FALSE; + } + } + + prog->Varying = _mesa_new_parameter_list(); + _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); + + if (prog->LinkStatus) { + link_shaders(ctx, prog); + } + + if (prog->LinkStatus) { + if (!ctx->Driver.LinkShader(ctx, prog)) { + prog->LinkStatus = GL_FALSE; + } + } + + set_uniform_initializers(ctx, prog); + + if (ctx->Shader.Flags & GLSL_DUMP) { + if (!prog->LinkStatus) { + printf("GLSL shader program %d failed to link\n", prog->Name); + } + + if (prog->InfoLog && prog->InfoLog[0] != 0) { + printf("GLSL shader program %d info log:\n", prog->Name); + printf("%s\n", prog->InfoLog); + } + } +} + +} /* extern "C" */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h new file mode 100644 index 00000000000..e21c0d1e0af --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -0,0 +1,66 @@ +/* + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" +#include "tgsi/tgsi_ureg.h" + +struct gl_context; +struct gl_shader; +struct gl_shader_program; +struct glsl_to_tgsi_visitor; + +enum pipe_error st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags); + +void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); + +struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name); + +void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); +GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); + +#ifdef __cplusplus +} +#endif diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index a41e5b16a85..75842286ba8 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -267,7 +267,7 @@ src_register( struct st_translate *t, /** * Map mesa texture target to TGSI texture target. */ -static unsigned +unsigned translate_texture_target( GLuint textarget, GLboolean shadow ) { @@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t, -static unsigned +unsigned translate_opcode( unsigned op ) { switch( op ) { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 0615e52ef62..0dbdf5f6159 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -64,6 +64,12 @@ st_translate_mesa_program( void st_free_tokens(const struct tgsi_token *tokens); +unsigned +translate_opcode(unsigned op); + +unsigned +translate_texture_target(GLuint textarget, GLboolean shadow); + #if defined __cplusplus } /* extern "C" */ diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 7a6d33d3fea..dd618424d66 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) * \param tokensOut destination for TGSI tokens * \return pointer to cached pipe_shader object. */ -static void -st_prepare_vertex_program(struct st_context *st, +void +st_prepare_vertex_program(struct gl_context *ctx, struct st_vertex_program *stvp) { GLuint attr; @@ -184,7 +184,7 @@ st_prepare_vertex_program(struct st_context *st, stvp->num_outputs = 0; if (stvp->Base.IsPositionInvariant) - _mesa_insert_mvp_code(st->ctx, &stvp->Base); + _mesa_insert_mvp_code(ctx, &stvp->Base); assert(stvp->Base.Base.NumInstructions > 1); @@ -292,7 +292,7 @@ st_translate_vertex_program(struct st_context *st, enum pipe_error error; unsigned num_outputs; - st_prepare_vertex_program( st, stvp ); + st_prepare_vertex_program(st->ctx, stvp); _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); @@ -318,22 +318,41 @@ st_translate_vertex_program(struct st_context *st, debug_printf("\n"); } - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_VERTEX, - ureg, - &stvp->Base.Base, - /* inputs */ - vpv->num_inputs, - stvp->input_to_index, - NULL, /* input semantic name */ - NULL, /* input semantic index */ - NULL, - /* outputs */ - num_outputs, - stvp->result_to_output, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags ); + if (stvp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + stvp->glsl_to_tgsi, + &stvp->Base.Base, + /* inputs */ + stvp->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, /* interp mode */ + /* outputs */ + stvp->num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + &stvp->Base.Base, + /* inputs */ + vpv->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, + /* outputs */ + num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); if (error) goto fail; @@ -393,6 +412,151 @@ st_get_vp_variant(struct st_context *st, return vpv; } +/** + * Translate Mesa fragment shader attributes to TGSI attributes. + * \return GL_TRUE if color output should be written to all render targets, + * GL_FALSE if not + */ +GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp) +{ + GLuint attr; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; + GLboolean write_all = GL_FALSE; + + /* + * Convert Mesa program inputs to TGSI input register semantics. + */ + for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { + if (inputsRead & (1 << attr)) { + const GLuint slot = stfp->num_inputs++; + + stfp->input_to_index[attr] = slot; + + switch (attr) { + case FRAG_ATTRIB_WPOS: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL0: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL1: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 1; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_FOGC: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_FACE: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT; + break; + /* In most cases, there is nothing special about these + * inputs, so adopt a convention to use the generic + * semantic name and the mesa FRAG_ATTRIB_ number as the + * index. + * + * All that is required is that the vertex shader labels + * its own outputs similarly, and that the vertex shader + * generates at least every output required by the + * fragment shader plus fixed-function hardware (such as + * BFC). + * + * There is no requirement that semantic indexes start at + * zero or be restricted to a particular range -- nobody + * should be building tables based on semantic index. + */ + case FRAG_ATTRIB_PNTC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + case FRAG_ATTRIB_VAR0: + default: + /* Actually, let's try and zero-base this just for + * readability of the generated TGSI. + */ + assert(attr >= FRAG_ATTRIB_TEX0); + stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + if (attr == FRAG_ATTRIB_PNTC) + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + else + stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + } + } + else { + stfp->input_to_index[attr] = -1; + } + } + + /* + * Semantics and mapping for outputs + */ + { + uint numColors = 0; + GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; + + /* if z is written, emit that first */ + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION; + stfp->output_semantic_index[stfp->num_outputs] = 0; + stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs; + stfp->num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); + } + + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL; + stfp->output_semantic_index[stfp->num_outputs] = 0; + stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs; + stfp->num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); + } + + /* handle remaning outputs (color) */ + for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { + if (outputsWritten & BITFIELD64_BIT(attr)) { + switch (attr) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + /* handled above */ + assert(0); + break; + case FRAG_RESULT_COLOR: + write_all = GL_TRUE; /* fallthrough */ + default: + assert(attr == FRAG_RESULT_COLOR || + (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR; + stfp->output_semantic_index[stfp->num_outputs] = numColors; + stfp->result_to_output[attr] = stfp->num_outputs; + numColors++; + break; + } + + stfp->num_outputs++; + } + } + } + + return write_all; +} + /** * Translate a Mesa fragment shader into a TGSI shader using extra info in @@ -445,155 +609,12 @@ st_translate_fragment_program(struct st_context *st, if (!stfp->tgsi.tokens) { /* need to translate Mesa instructions to TGSI now */ - GLuint outputMapping[FRAG_RESULT_MAX]; - GLuint inputMapping[FRAG_ATTRIB_MAX]; - GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ - GLuint attr; enum pipe_error error; - const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; - GLboolean write_all = GL_FALSE; - - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - uint fs_num_inputs = 0; - - ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint fs_num_outputs = 0; - - + GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { - if (inputsRead & (1 << attr)) { - const GLuint slot = fs_num_inputs++; - - inputMapping[attr] = slot; - - switch (attr) { - case FRAG_ATTRIB_WPOS: - input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL0: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL1: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 1; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_FOGC: - input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case FRAG_ATTRIB_FACE: - input_semantic_name[slot] = TGSI_SEMANTIC_FACE; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - /* In most cases, there is nothing special about these - * inputs, so adopt a convention to use the generic - * semantic name and the mesa FRAG_ATTRIB_ number as the - * index. - * - * All that is required is that the vertex shader labels - * its own outputs similarly, and that the vertex shader - * generates at least every output required by the - * fragment shader plus fixed-function hardware (such as - * BFC). - * - * There is no requirement that semantic indexes start at - * zero or be restricted to a particular range -- nobody - * should be building tables based on semantic index. - */ - case FRAG_ATTRIB_PNTC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - case FRAG_ATTRIB_VAR0: - default: - /* Actually, let's try and zero-base this just for - * readability of the generated TGSI. - */ - assert(attr >= FRAG_ATTRIB_TEX0); - input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); - input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - if (attr == FRAG_ATTRIB_PNTC) - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - else - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - } - } - else { - inputMapping[attr] = -1; - } - } - - /* - * Semantics and mapping for outputs - */ - { - uint numColors = 0; - GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; - - /* if z is written, emit that first */ - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); - } - - /* handle remaning outputs (color) */ - for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { - if (outputsWritten & BITFIELD64_BIT(attr)) { - switch (attr) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - /* handled above */ - assert(0); - break; - case FRAG_RESULT_COLOR: - write_all = GL_TRUE; /* fallthrough */ - default: - assert(attr == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; - fs_output_semantic_index[fs_num_outputs] = numColors; - outputMapping[attr] = fs_num_outputs; - numColors++; - break; - } - - fs_num_outputs++; - } - } - } - ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) return NULL; @@ -606,21 +627,39 @@ st_translate_fragment_program(struct st_context *st, if (write_all == GL_TRUE) ureg_property_fs_color0_writes_all_cbufs(ureg, 1); - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - &stfp->Base.Base, - /* inputs */ - fs_num_inputs, - inputMapping, - input_semantic_name, - input_semantic_index, - interpMode, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index, FALSE ); + if (stfp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->glsl_to_tgsi, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index c4244df939e..67723de6d53 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -38,6 +38,7 @@ #include "program/program.h" #include "pipe/p_state.h" #include "st_context.h" +#include "st_glsl_to_tgsi.h" /** Fragment program variant key */ @@ -83,6 +84,22 @@ struct st_fp_variant struct st_fragment_program { struct gl_fragment_program Base; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */ + GLuint input_to_index[FRAG_ATTRIB_MAX]; + /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */ + GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + GLuint num_inputs; + GLuint interp_mode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ + + /** Maps FRAG_RESULT_x to slot */ + GLuint result_to_output[FRAG_RESULT_MAX]; + ubyte output_semantic_name[FRAG_RESULT_MAX]; + ubyte output_semantic_index[FRAG_RESULT_MAX]; + GLuint num_outputs; struct pipe_shader_state tgsi; @@ -136,6 +153,7 @@ struct st_vp_variant struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ GLuint input_to_index[VERT_ATTRIB_MAX]; @@ -184,6 +202,7 @@ struct st_gp_variant struct st_geometry_program { struct gl_geometry_program Base; /**< The Mesa geometry program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** map GP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; @@ -276,6 +295,14 @@ st_get_gp_variant(struct st_context *st, const struct st_gp_variant_key *key); +extern void +st_prepare_vertex_program(struct gl_context *ctx, + struct st_vertex_program *stvp); + +extern GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp); + extern void st_release_vp_variants( struct st_context *st, From 1e5fd8e480b661c1ab748c2ded587650ea7f3d20 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 29 Apr 2011 19:00:24 -0500 Subject: [PATCH 141/600] mesa: fix segfault when no Mesa IR is generated --- src/mesa/program/program.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 78efca9f122..224446a2683 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) if (prog->String) free(prog->String); - _mesa_free_instructions(prog->Instructions, prog->NumInstructions); - + if (prog->Instructions) { + _mesa_free_instructions(prog->Instructions, prog->NumInstructions); + } if (prog->Parameters) { _mesa_free_parameter_list(prog->Parameters); } From 44867da3543ca54ef245695cef72a6e305451d93 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 29 Apr 2011 19:24:57 -0500 Subject: [PATCH 142/600] glsl_to_tgsi: stop generating Mesa IR Before, it was still generating unused Mesa IR as a remnant of ir_to_mesa, and depended on some of the information from it. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 290 ++------------------- src/mesa/state_tracker/st_program.c | 13 +- 2 files changed, 33 insertions(+), 270 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e1102503ee0..c562abc96c9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -254,8 +254,9 @@ public: struct gl_shader_compiler_options *options; int next_temp; - + int num_address_regs; + int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; @@ -2310,170 +2311,23 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) delete v; } -static struct prog_src_register -mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg) -{ - struct prog_src_register mesa_reg; - - mesa_reg.File = reg.file; - assert(reg.index < (1 << INST_INDEX_BITS)); - mesa_reg.Index = reg.index; - mesa_reg.Swizzle = reg.swizzle; - mesa_reg.RelAddr = reg.reladdr != NULL; - mesa_reg.Negate = reg.negate; - mesa_reg.Abs = 0; - mesa_reg.HasIndex2 = GL_FALSE; - mesa_reg.RelAddr2 = 0; - mesa_reg.Index2 = 0; - - return mesa_reg; -} - -static void -set_branchtargets(glsl_to_tgsi_visitor *v, - struct prog_instruction *mesa_instructions, - int num_instructions) -{ - int if_count = 0, loop_count = 0; - int *if_stack, *loop_stack; - int if_stack_pos = 0, loop_stack_pos = 0; - int i, j; - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_count++; - break; - case OPCODE_BGNLOOP: - loop_count++; - break; - case OPCODE_BRK: - case OPCODE_CONT: - mesa_instructions[i].BranchTarget = -1; - break; - default: - break; - } - } - - if_stack = rzalloc_array(v->mem_ctx, int, if_count); - loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_stack[if_stack_pos] = i; - if_stack_pos++; - break; - case OPCODE_ELSE: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack[if_stack_pos - 1] = i; - break; - case OPCODE_ENDIF: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack_pos--; - break; - case OPCODE_BGNLOOP: - loop_stack[loop_stack_pos] = i; - loop_stack_pos++; - break; - case OPCODE_ENDLOOP: - loop_stack_pos--; - /* Rewrite any breaks/conts at this nesting level (haven't - * already had a BranchTarget assigned) to point to the end - * of the loop. - */ - for (j = loop_stack[loop_stack_pos]; j < i; j++) { - if (mesa_instructions[j].Opcode == OPCODE_BRK || - mesa_instructions[j].Opcode == OPCODE_CONT) { - if (mesa_instructions[j].BranchTarget == -1) { - mesa_instructions[j].BranchTarget = i; - } - } - } - /* The loop ends point at each other. */ - mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; - mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; - break; - case OPCODE_CAL: - foreach_iter(exec_list_iterator, iter, v->function_signatures) { - function_entry *entry = (function_entry *)iter.get(); - - if (entry->sig_id == mesa_instructions[i].BranchTarget) { - mesa_instructions[i].BranchTarget = entry->inst; - break; - } - } - break; - default: - break; - } - } -} - -static void -print_program(struct prog_instruction *mesa_instructions, - ir_instruction **mesa_instruction_annotation, - int num_instructions) -{ - /*ir_instruction *last_ir = NULL;*/ - int i; - int indent = 0; - - for (i = 0; i < num_instructions; i++) { - struct prog_instruction *mesa_inst = mesa_instructions + i; - - fprintf(stdout, "%3d: ", i); - -#if 0 -/* Disable this for now, since printing GLSL IR along with its corresponding - * Mesa IR makes the Mesa IR unreadable. */ - ir_instruction *ir = mesa_instruction_annotation[i]; - if (last_ir != ir && ir) { - int j; - - for (j = 0; j < indent; j++) { - fprintf(stdout, " "); - } - ir->print(); - printf("\n"); - last_ir = ir; - - fprintf(stdout, " "); /* line number spacing. */ - } -#endif - - indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, - PROG_PRINT_DEBUG, NULL); - } -} - /** * Count resources used by the given gpu program (number of texture * samplers, etc). */ static void -count_resources(struct gl_program *prog) +count_resources(glsl_to_tgsi_visitor *v) { - unsigned int i; + v->samplers_used = 0; - prog->SamplersUsed = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = &prog->Instructions[i]; - - if (_mesa_is_tex_instruction(inst->Opcode)) { - prog->SamplerTargets[inst->TexSrcUnit] = - (gl_texture_index)inst->TexSrcTarget; - prog->SamplersUsed |= 1 << inst->TexSrcUnit; - if (inst->TexShadow) { - prog->ShadowSamplers |= 1 << inst->TexSrcUnit; - } + if (_mesa_is_tex_instruction(inst->op)) { + v->samplers_used |= 1 << inst->sampler; } } - - _mesa_update_shader_textures_used(prog); } @@ -2487,34 +2341,35 @@ count_resources(struct gl_program *prog) static void check_resources(const struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_program *prog) + glsl_to_tgsi_visitor *prog, + struct gl_program *proginfo) { - switch (prog->Target) { + switch (proginfo->Target) { case GL_VERTEX_PROGRAM_ARB: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxVertexTextureImageUnits) { fail_link(shader_program, "Too many vertex shader texture samplers"); } - if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { fail_link(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxGeometryTextureImageUnits) { fail_link(shader_program, "Too many geometry shader texture samplers"); } - if (prog->Parameters->NumParameters > + if (proginfo->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { fail_link(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxTextureImageUnits) { fail_link(shader_program, "Too many fragment shader texture samplers"); } - if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { fail_link(shader_program, "Too many fragment shader constants"); } break; @@ -3767,8 +3622,6 @@ st_translate_program( t->pointSizeOutIndex = -1; t->prevInstWrotePointSize = GL_FALSE; - /*_mesa_print_program(program);*/ - /* * Declare input attributes. */ @@ -3952,8 +3805,7 @@ st_translate_program( /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { - // XXX: depends on SamplersUsed property generated by conversion to Mesa IR - if (proginfo->SamplersUsed & (1 << i)) { + if (program->samplers_used & (1 << i)) { t->samplers[i] = ureg_DECL_sampler( ureg, i ); } } @@ -4006,7 +3858,8 @@ out: /* ----------------------------- End TGSI code ------------------------------ */ /** - * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader. + * Convert a shader's GLSL IR into a Mesa gl_program, although without + * generating Mesa IR. */ static struct gl_program * get_mesa_program(struct gl_context *ctx, @@ -4014,9 +3867,6 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader *shader) { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); - struct prog_instruction *mesa_instructions, *mesa_inst; - ir_instruction **mesa_instruction_annotation; - int i; struct gl_program *prog; GLenum target; const char *target_string; @@ -4110,90 +3960,6 @@ get_mesa_program(struct gl_context *ctx, v->merge_registers(); v->renumber_registers(); - prog->NumTemporaries = v->next_temp; - - int num_instructions = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - num_instructions++; - } - - mesa_instructions = - (struct prog_instruction *)calloc(num_instructions, - sizeof(*mesa_instructions)); - mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *, - num_instructions); - - /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions. - * TODO: remove - */ - mesa_inst = mesa_instructions; - i = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - - mesa_inst->Opcode = inst->op; - mesa_inst->CondUpdate = inst->cond_update; - if (inst->saturate) - mesa_inst->SaturateMode = SATURATE_ZERO_ONE; - mesa_inst->DstReg.File = inst->dst.file; - mesa_inst->DstReg.Index = inst->dst.index; - mesa_inst->DstReg.CondMask = inst->dst.cond_mask; - mesa_inst->DstReg.WriteMask = inst->dst.writemask; - mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; - mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]); - mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]); - mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]); - mesa_inst->TexSrcUnit = inst->sampler; - mesa_inst->TexSrcTarget = inst->tex_target; - mesa_inst->TexShadow = inst->tex_shadow; - mesa_instruction_annotation[i] = inst->ir; - - /* Set IndirectRegisterFiles. */ - if (mesa_inst->DstReg.RelAddr) - prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; - - /* Update program's bitmask of indirectly accessed register files */ - for (unsigned src = 0; src < 3; src++) - if (mesa_inst->SrcReg[src].RelAddr) - prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); - } - - switch (mesa_inst->Opcode) { - case OPCODE_BGNSUB: - inst->function->inst = i; - mesa_inst->Comment = strdup(inst->function->sig->function_name()); - break; - case OPCODE_ENDSUB: - mesa_inst->Comment = strdup(inst->function->sig->function_name()); - break; - case OPCODE_CAL: - mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ - break; - case OPCODE_ARL: - prog->NumAddressRegs = 1; - break; - default: - break; - } - - mesa_inst++; - i++; - - if (!shader_program->LinkStatus) - break; - } - - if (!shader_program->LinkStatus) { - free(mesa_instructions); - _mesa_reference_program(ctx, &shader->Program, NULL); - return NULL; - } - - set_branchtargets(v, mesa_instructions, num_instructions); - if (ctx->Shader.Flags & GLSL_DUMP) { printf("\n"); printf("GLSL IR for linked %s program %d:\n", target_string, @@ -4201,25 +3967,17 @@ get_mesa_program(struct gl_context *ctx, _mesa_print_ir(shader->ir, NULL); printf("\n"); printf("\n"); - printf("Mesa IR for linked %s program %d:\n", target_string, - shader_program->Name); - print_program(mesa_instructions, mesa_instruction_annotation, - num_instructions); } - prog->Instructions = mesa_instructions; - prog->NumInstructions = num_instructions; + prog->Instructions = NULL; + prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog); - count_resources(prog); + count_resources(v); - check_resources(ctx, shader_program, prog); + check_resources(ctx, shader_program, v, prog); _mesa_reference_program(ctx, &shader->Program, prog); - - if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { - _mesa_optimize_program(ctx, prog); - } struct st_vertex_program *stvp; struct st_fragment_program *stfp; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index dd618424d66..6d395128295 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -186,7 +186,8 @@ st_prepare_vertex_program(struct gl_context *ctx, if (stvp->Base.IsPositionInvariant) _mesa_insert_mvp_code(ctx, &stvp->Base); - assert(stvp->Base.Base.NumInstructions > 1); + if (!stvp->glsl_to_tgsi) + assert(stvp->Base.Base.NumInstructions > 1); /* * Determine number of inputs, the mappings between VERT_ATTRIB_x @@ -294,8 +295,11 @@ st_translate_vertex_program(struct st_context *st, st_prepare_vertex_program(st->ctx, stvp); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + if (!stvp->glsl_to_tgsi) + { + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + } ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); if (ureg == NULL) { @@ -613,7 +617,8 @@ st_translate_fragment_program(struct st_context *st, struct ureg_program *ureg; GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); - _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + if (!stfp->glsl_to_tgsi) + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) From c341d3cfd0ddbabf6274212b7f0da1a25854a673 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 13:03:33 -0500 Subject: [PATCH 143/600] glsl_to_tgsi: remove reads to output registers Fixes a regression in 0 A.D. introduced by 809a11c77073e999fd47. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 91 ++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index c562abc96c9..5ea03b4424e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -100,6 +100,15 @@ public: this->reladdr = NULL; } + st_src_reg(gl_register_file file, int index) + { + this->file = file; + this->index = index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + st_src_reg() { this->file = PROGRAM_UNDEFINED; @@ -346,6 +355,8 @@ public: bool process_move_condition(ir_rvalue *ir); + void remove_output_reads(gl_register_file type); + void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); int get_first_temp_write(int index); @@ -2595,6 +2606,81 @@ set_uniform_initializers(struct gl_context *ctx, ralloc_free(mem_ctx); } +/* + * Scan/rewrite program to remove reads of custom (output) registers. + * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING + * (for vertex shaders). + * In GLSL shaders, varying vars can be read and written. + * On some hardware, trying to read an output register causes trouble. + * So, rewrite the program to use a temporary register in this case. + * + * Based on _mesa_remove_output_reads from programopt.c. + */ +void +glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) +{ + GLuint i; + GLint outputMap[VERT_RESULT_MAX]; + GLuint numVaryingReads = 0; + GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLuint firstTemp = 0; + + _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, + usedTemps, MAX_PROGRAM_TEMPS); + + assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); + assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); + + for (i = 0; i < VERT_RESULT_MAX; i++) + outputMap[i] = -1; + + /* look for instructions which read from varying vars */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->src[j].file == type) { + /* replace the read with a temp reg */ + const GLuint var = inst->src[j].index; + if (outputMap[var] == -1) { + numVaryingReads++; + outputMap[var] = _mesa_find_free_register(usedTemps, + MAX_PROGRAM_TEMPS, + firstTemp); + firstTemp = outputMap[var] + 1; + } + inst->src[j].file = PROGRAM_TEMPORARY; + inst->src[j].index = outputMap[var]; + } + } + } + + if (numVaryingReads == 0) + return; /* nothing to be done */ + + /* look for instructions which write to the varying vars identified above */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { + /* change inst to write to the temp reg, instead of the varying */ + inst->dst.file = PROGRAM_TEMPORARY; + inst->dst.index = outputMap[inst->dst.index]; + } + } + + /* insert new MOV instructions at the end */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (outputMap[i] >= 0) { + /* MOV VAR[i], TEMP[tmp]; */ + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); + dst.index = i; + this->emit(NULL, OPCODE_MOV, dst, src); + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -3954,6 +4040,11 @@ get_mesa_program(struct gl_context *ctx, } #endif + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->copy_propagate(); v->eliminate_dead_code(); From 556bd82ce1227a568d69dfa0c22841986267d39f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 13:44:32 -0500 Subject: [PATCH 144/600] glsl_to_tgsi: remove a bad assertion It was triggered by Alien Arena. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5ea03b4424e..aa63539e5e8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3077,11 +3077,11 @@ glsl_to_tgsi_visitor::merge_registers(void) /* Start looking for registers with non-overlapping usages that can be * merged together. */ - for (i=0; i < this->next_temp - 1; i++) { + for (i=0; i < this->next_temp; i++) { /* Don't touch unused registers. */ if (last_reads[i] < 0 || first_writes[i] < 0) continue; - for (j=i+1; j < this->next_temp; j++) { + for (j=0; j < this->next_temp; j++) { /* Don't touch unused registers. */ if (last_reads[j] < 0 || first_writes[j] < 0) continue; @@ -3089,8 +3089,9 @@ glsl_to_tgsi_visitor::merge_registers(void) * in the same instruction as the last read from i. Note that the * register at index i will always be used earlier or at the same time * as the register at index j. */ - assert(first_writes[i] <= first_writes[j]); - if (last_reads[i] <= first_writes[j]) { + if (first_writes[i] <= first_writes[j] && + last_reads[i] <= first_writes[j]) + { rename_temp_register(j, i); /* Replace all references to j with i.*/ /* Update the first_writes and last_reads arrays with the new From 5768ed6429937940bd48f5de4f8383273952880a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 21:17:38 -0500 Subject: [PATCH 145/600] glsl_to_tgsi: define the sampler objects used Fixes the Nexuiz title screen and the water in 0 A.D. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index aa63539e5e8..5f3f0ba295a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2328,7 +2328,7 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) * samplers, etc). */ static void -count_resources(glsl_to_tgsi_visitor *v) +count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; @@ -2337,8 +2337,17 @@ count_resources(glsl_to_tgsi_visitor *v) if (_mesa_is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; + + prog->SamplerTargets[inst->sampler] = + (gl_texture_index)inst->tex_target; + if (inst->tex_shadow) { + prog->ShadowSamplers |= 1 << inst->sampler; + } } } + + prog->SamplersUsed = v->samplers_used; + _mesa_update_shader_textures_used(prog); } @@ -4065,7 +4074,7 @@ get_mesa_program(struct gl_context *ctx, prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog); - count_resources(v); + count_resources(v, prog); check_resources(ctx, shader_program, v, prog); From a6705aa5ca151278ed1e596b68a327afd1405b9e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 23:17:11 -0500 Subject: [PATCH 146/600] glsl_to_tgsi: lower noise opcodes when converting from GLSL IR, not when generating TGSI --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5f3f0ba295a..08c6a7b2dd3 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1275,12 +1275,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_noise: { - const enum prog_opcode opcode = - prog_opcode(OPCODE_NOISE1 - + (ir->operands[0]->type->vector_elements) - 1); - assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); - - emit(ir, opcode, result_dst, op[0]); + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } @@ -3484,13 +3485,7 @@ compile_tgsi_instruction(struct st_translate *t, case OPCODE_NOISE2: case OPCODE_NOISE3: case OPCODE_NOISE4: - /* At some point, a motivated person could add a better - * implementation of noise. Currently not even the nvidia - * binary drivers do anything more than this. In any case, the - * place to do this is in the GL state tracker, not the poor - * driver. - */ - ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + assert(!"OPCODE_NOISE should have been lowered\n"); break; case OPCODE_DDY: From 3b0858f1aed83e2d90449f042d625c86ac7b93ed Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 1 May 2011 11:55:03 -0500 Subject: [PATCH 147/600] glsl_to_tgsi: support DDY (ir_unop_dFdy) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 08c6a7b2dd3..eed9bb0819e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1271,6 +1271,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: + op[0].negate = ~op[0].negate; emit(ir, OPCODE_DDY, result_dst, op[0]); break; @@ -3487,12 +3488,6 @@ compile_tgsi_instruction(struct st_translate *t, case OPCODE_NOISE4: assert(!"OPCODE_NOISE should have been lowered\n"); break; - - case OPCODE_DDY: - // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c - assert(!"OPCODE_DDY"); - //emit_ddy( t, dst[0], &inst->src[0] ); - break; default: ureg_insn( ureg, From 56dc2c176c3ef0d4d5abea54ff4035b062262286 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 1 May 2011 21:49:21 -0500 Subject: [PATCH 148/600] glsl_to_tgsi: use TGSI opcodes when converting from GLSL IR Before, the translator used Mesa IR opcodes (a holdover from ir_to_mesa) and converted them to TGSI opcodes during TGSI emission. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 550 ++++++++------------- 1 file changed, 217 insertions(+), 333 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index eed9bb0819e..4cb2f377e98 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -27,7 +27,7 @@ /** * \file glsl_to_tgsi.cpp * - * Translate GLSL IR to Mesa's gl_program representation and to TGSI. + * Translate GLSL IR to TGSI. */ #include @@ -63,11 +63,12 @@ extern "C" { #include "pipe/p_state.h" #include "util/u_math.h" #include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "st_context.h" #include "st_program.h" #include "st_glsl_to_tgsi.h" #include "st_mesa_to_tgsi.h" +} #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ @@ -75,7 +76,6 @@ extern "C" { (1 << PROGRAM_NAMED_PARAM) | \ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -} class st_src_reg; class st_dst_reg; @@ -83,8 +83,7 @@ class st_dst_reg; static int swizzle_for_size(int size); /** - * This struct is a corresponding struct to Mesa prog_src_register, with - * wider fields. + * This struct is a corresponding struct to TGSI ureg_src. */ class st_src_reg { public: @@ -190,7 +189,7 @@ public: return node; } - enum prog_opcode op; + unsigned op; st_dst_reg dst; st_src_reg src[3]; /** Pointer to the ir source this tree came from for debugging */ @@ -201,7 +200,7 @@ public: int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; - class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; class variable_storage : public exec_node { @@ -317,15 +316,15 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); @@ -338,13 +337,13 @@ public: st_src_reg src1, unsigned elements); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, + void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, + void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); - void emit_scs(ir_instruction *ir, enum prog_opcode op, + void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); GLboolean try_emit_mad(ir_expression *ir, @@ -405,8 +404,29 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } +static bool +is_tex_instruction(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex; +} + +static unsigned +num_inst_dst_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->num_dst; +} + +static unsigned +num_inst_src_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex ? info->num_src - 1 : info->num_src; +} + glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2) { @@ -427,7 +447,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, reladdr_to_temp(ir, &src0, &num_reladdr); if (dst.reladdr) { - emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); + emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr); num_reladdr--; } assert(num_reladdr == 0); @@ -441,7 +461,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, inst->function = NULL; - if (op == OPCODE_ARL) + if (op == TGSI_OPCODE_ARL) this->num_address_regs = 1; /* Update indirect addressing status used by TGSI */ @@ -491,14 +511,14 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { return emit(ir, op, dst, src0, src1, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { assert(dst.writemask != 0); @@ -506,7 +526,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) { return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } @@ -516,30 +536,30 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) { - static const gl_inst_opcode dot_opcodes[] = { - OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 + static const unsigned dot_opcodes[] = { + TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** - * Emits Mesa scalar opcodes to produce unique answers across channels. + * Emits TGSI scalar opcodes to produce unique answers across channels. * - * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X * channel determines the result across all channels. So to do a vec4 * of this operation, we want to emit a scalar per source channel used * to produce dest channels. */ void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg orig_src0, st_src_reg orig_src1) { int i, j; int done_mask = ~dst.writemask; - /* Mesa RCP is a scalar operation splatting results to all channels, + /* TGSI RCP is a scalar operation splatting results to all channels, * like ARB_fp/vp. So emit as many RCPs as necessary to cover our * dst channels. */ @@ -577,7 +597,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, } void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { st_src_reg undef = undef_src; @@ -588,21 +608,21 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, } /** - * Emit an OPCODE_SCS instruction + * Emit an TGSI_OPCODE_SCS instruction * - * The \c SCS opcode functions a bit differently than the other Mesa (or - * ARB_fragment_program) opcodes. Instead of splatting its result across all - * four components of the destination, it writes one value to the \c x - * component and another value to the \c y component. + * The \c SCS opcode functions a bit differently than the other TGSI opcodes. + * Instead of splatting its result across all four components of the + * destination, it writes one value to the \c x component and another value to + * the \c y component. * * \param ir IR instruction being processed - * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which - * value is desired. + * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending + * on which value is desired. * \param dst Destination register * \param src Source register */ void -glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src) { @@ -613,12 +633,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, return; } - const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; const unsigned scs_mask = (1U << component); int done_mask = ~dst.writemask; st_src_reg tmp; - assert(op == OPCODE_SIN || op == OPCODE_COS); + assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); /* If there are compnents in the destination that differ from the component * that will be written by the SCS instrution, we'll need a temporary. @@ -661,7 +681,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, /* Emit the SCS instruction. */ - inst = emit(ir, OPCODE_SCS, tmp_dst, src0); + inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); inst->dst.writemask = scs_mask; /* Move the result of the SCS instruction to the desired location in @@ -669,12 +689,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, */ tmp.swizzle = MAKE_SWIZZLE4(component, component, component, component); - inst = emit(ir, OPCODE_SCS, dst, tmp); + inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); inst->dst.writemask = this_mask; } else { /* Emit the SCS instruction to write directly to the destination. */ - glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); + glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); inst->dst.writemask = scs_mask; } @@ -870,7 +890,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } else { st_src_reg src(PROGRAM_STATE_VAR, index, NULL); src.swizzle = slots[i].swizzle; - emit(ir, OPCODE_MOV, dst, src); + emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ dst.index++; } @@ -903,7 +923,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir) delete a; } - emit(NULL, OPCODE_BGNLOOP); + emit(NULL, TGSI_OPCODE_BGNLOOP); if (ir->to) { ir_expression *e = @@ -936,7 +956,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir) delete e; } - emit(NULL, OPCODE_ENDLOOP); + emit(NULL, TGSI_OPCODE_ENDLOOP); } void @@ -944,10 +964,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) { switch (ir->mode) { case ir_loop_jump::jump_break: - emit(NULL, OPCODE_BRK); + emit(NULL, TGSI_OPCODE_BRK); break; case ir_loop_jump::jump_continue: - emit(NULL, OPCODE_CONT); + emit(NULL, TGSI_OPCODE_CONT); break; } } @@ -1000,7 +1020,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); return true; } @@ -1023,7 +1043,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) this->result = get_temp(ir->type); glsl_to_tgsi_instruction *inst; - inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); + inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); inst->saturate = true; return true; @@ -1036,135 +1056,18 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr) return; - emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); + emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); + emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); *reg = temp; } (*num_reladdr)--; } -void -glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) -{ - /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. - * This means that each of the operands is either an immediate value of -1, - * 0, or 1, or is a component from one source register (possibly with - * negation). - */ - uint8_t components[4] = { 0 }; - bool negate[4] = { false }; - ir_variable *var = NULL; - - for (unsigned i = 0; i < ir->type->vector_elements; i++) { - ir_rvalue *op = ir->operands[i]; - - assert(op->type->is_scalar()); - - while (op != NULL) { - switch (op->ir_type) { - case ir_type_constant: { - - assert(op->type->is_scalar()); - - const ir_constant *const c = op->as_constant(); - if (c->is_one()) { - components[i] = SWIZZLE_ONE; - } else if (c->is_zero()) { - components[i] = SWIZZLE_ZERO; - } else if (c->is_negative_one()) { - components[i] = SWIZZLE_ONE; - negate[i] = true; - } else { - assert(!"SWZ constant must be 0.0 or 1.0."); - } - - op = NULL; - break; - } - - case ir_type_dereference_variable: { - ir_dereference_variable *const deref = - (ir_dereference_variable *) op; - - assert((var == NULL) || (deref->var == var)); - components[i] = SWIZZLE_X; - var = deref->var; - op = NULL; - break; - } - - case ir_type_expression: { - ir_expression *const expr = (ir_expression *) op; - - assert(expr->operation == ir_unop_neg); - negate[i] = true; - - op = expr->operands[0]; - break; - } - - case ir_type_swizzle: { - ir_swizzle *const swiz = (ir_swizzle *) op; - - components[i] = swiz->mask.x; - op = swiz->val; - break; - } - - default: - assert(!"Should not get here."); - return; - } - } - } - - assert(var != NULL); - - ir_dereference_variable *const deref = - new(mem_ctx) ir_dereference_variable(var); - - this->result.file = PROGRAM_UNDEFINED; - deref->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - ir_print_visitor v; - printf("Failed to get tree for expression operand:\n"); - deref->accept(&v); - exit(1); - } - - st_src_reg src; - - src = this->result; - src.swizzle = MAKE_SWIZZLE4(components[0], - components[1], - components[2], - components[3]); - src.negate = ((unsigned(negate[0]) << 0) - | (unsigned(negate[1]) << 1) - | (unsigned(negate[2]) << 2) - | (unsigned(negate[3]) << 3)); - - /* Storage for our result. Ideally for an assignment we'd be using the - * actual storage for the result here, instead. - */ - const st_src_reg result_src = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(result_src); - - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - emit(ir, OPCODE_SWZ, result_dst, src); - this->result = result_src; -} - void glsl_to_tgsi_visitor::visit(ir_expression *ir) { @@ -1173,7 +1076,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg result_src; st_dst_reg result_dst; - /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) + /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ if (ir->operation == ir_binop_add) { if (try_emit_mad(ir, 1)) @@ -1184,10 +1087,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (try_emit_sat(ir)) return; - if (ir->operation == ir_quadop_vector) { - this->emit_swz(ir); - return; - } + if (ir->operation == ir_quadop_vector) + assert(!"ir_quadop_vector should have been lowered"); for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = PROGRAM_UNDEFINED; @@ -1228,51 +1129,51 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; result_src = op[0]; break; case ir_unop_abs: - emit(ir, OPCODE_ABS, result_dst, op[0]); + emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: - emit(ir, OPCODE_SSG, result_dst, op[0]); + emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); break; case ir_unop_rcp: - emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); break; case ir_unop_exp2: - emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); break; case ir_unop_exp: case ir_unop_log: assert(!"not reached: should be handled by ir_explog_to_explog2"); break; case ir_unop_log2: - emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); break; case ir_unop_sin: - emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos: - emit_scalar(ir, OPCODE_COS, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; case ir_unop_sin_reduced: - emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos_reduced: - emit_scs(ir, OPCODE_COS, result_dst, op[0]); + emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; case ir_unop_dFdx: - emit(ir, OPCODE_DDX, result_dst, op[0]); + emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: op[0].negate = ~op[0].negate; - emit(ir, OPCODE_DDY, result_dst, op[0]); + emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); break; case ir_unop_noise: { @@ -1282,19 +1183,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * place to do this is in the GL state tracker, not the poor * driver. */ - emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } case ir_binop_add: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); break; case ir_binop_mul: - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -1303,33 +1204,33 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_binop_less: - emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); break; case ir_binop_lequal: - emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); break; case ir_binop_gequal: - emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: - emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); break; case ir_binop_nequal: - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_all_equal: /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); } else { - emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } break; case ir_binop_any_nequal: @@ -1337,11 +1238,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); } else { - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); } break; @@ -1349,22 +1250,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) assert(ir->operands[0]->type->is_vector()); emit_dp(ir, result_dst, op[0], op[0], ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); break; case ir_binop_logic_xor: - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_logic_or: /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); break; case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1376,15 +1277,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_sqrt: /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); /* For incoming channels <= 0, set the result to 0. */ op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, + emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], result_src, st_src_reg_for_float(0.0)); break; case ir_unop_rsq: - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: case ir_unop_b2f: @@ -1393,36 +1294,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src = op[0]; break; case ir_unop_f2i: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: - emit(ir, OPCODE_SNE, result_dst, + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: op[0].negate = ~op[0].negate; - emit(ir, OPCODE_FLR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); result_src.negate = ~result_src.negate; break; case ir_unop_floor: - emit(ir, OPCODE_FLR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; case ir_unop_fract: - emit(ir, OPCODE_FRC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; case ir_binop_min: - emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); break; case ir_binop_max: - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); break; case ir_binop_pow: - emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); + emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); break; case ir_unop_bit_not: @@ -1586,7 +1487,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } else { index_reg = get_temp(glsl_type::float_type); - emit(ir, OPCODE_MUL, st_dst_reg(index_reg), + emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), this->result, st_src_reg_for_float(element_size)); } @@ -1728,9 +1629,9 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) src_ir->accept(this); - /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the * condition we produced is 0.0 or 1.0. By flipping the sign, we can - * choose which value OPCODE_CMP produces without an extra instruction + * choose which value TGSI_OPCODE_CMP produces without an extra instruction * computing the condition. */ if (negate) @@ -1803,9 +1704,9 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) for (i = 0; i < type_size(ir->lhs->type); i++) { if (switch_order) { - emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); } else { - emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); } l.index++; @@ -1813,7 +1714,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } } else { for (i = 0; i < type_size(ir->lhs->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -1849,7 +1750,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = this->result; for (i = 0; i < (unsigned int)size; i++) { - emit(ir, OPCODE_MOV, temp, src); + emit(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -1870,7 +1771,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->array_elements[i]->accept(this); src = this->result; for (int j = 0; j < size; j++) { - emit(ir, OPCODE_MOV, temp, src); + emit(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -1893,7 +1794,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values, ir->type->vector_elements, &src.swizzle); - emit(ir, OPCODE_MOV, mat_column, src); + emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; } @@ -2005,7 +1906,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2016,7 +1917,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) assert(!sig_iter.has_next()); /* Emit call instruction */ - call_inst = emit(ir, OPCODE_CAL); + call_inst = emit(ir, TGSI_OPCODE_CAL); call_inst->function = entry; /* Process out parameters. */ @@ -2041,7 +1942,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) st_dst_reg l = st_dst_reg(this->result); for (i = 0; i < type_size(param->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2061,7 +1962,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_src_reg result_src, coord, lod_info, projector, dx, dy; st_dst_reg result_dst, coord_dst; glsl_to_tgsi_instruction *inst = NULL; - prog_opcode opcode = OPCODE_NOP; + unsigned opcode = TGSI_OPCODE_NOP; ir->coordinate->accept(this); @@ -2072,7 +1973,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); - emit(ir, OPCODE_MOV, coord_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); if (ir->projector) { ir->projector->accept(this); @@ -2087,20 +1988,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: - opcode = OPCODE_TEX; + opcode = TGSI_OPCODE_TEX; break; case ir_txb: - opcode = OPCODE_TXB; + opcode = TGSI_OPCODE_TXB; ir->lod_info.bias->accept(this); lod_info = this->result; break; case ir_txl: - opcode = OPCODE_TXL; + opcode = TGSI_OPCODE_TXL; ir->lod_info.lod->accept(this); lod_info = this->result; break; case ir_txd: - opcode = OPCODE_TXD; + opcode = TGSI_OPCODE_TXD; ir->lod_info.grad.dPdx->accept(this); dx = this->result; ir->lod_info.grad.dPdy->accept(this); @@ -2112,25 +2013,25 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } if (ir->projector) { - if (opcode == OPCODE_TEX) { + if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, projector); + emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); coord_dst.writemask = WRITEMASK_XYZW; - opcode = OPCODE_TXP; + opcode = TGSI_OPCODE_TXP; } else { st_src_reg coord_w = coord; coord_w.swizzle = SWIZZLE_WWWW; /* For the other TEX opcodes there's no projective version - * since the last slot is taken up by lod info. Do the + * since the last slot is taken up by LOD info. Do the * projective divide now. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_RCP, coord_dst, projector); + emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); /* In the case where we have to project the coordinates "by hand," - * the shadow comparitor value must also be projected. + * the shadow comparator value must also be projected. */ st_src_reg tmp_src = coord; if (ir->shadow_comparitor) { @@ -2143,42 +2044,42 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_dst_reg tmp_dst = st_dst_reg(tmp_src); tmp_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, tmp_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); tmp_dst.writemask = WRITEMASK_XY; - emit(ir, OPCODE_MOV, tmp_dst, coord); + emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); } coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); + emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); coord_dst.writemask = WRITEMASK_XYZW; coord.swizzle = SWIZZLE_XYZW; } } - /* If projection is done and the opcode is not OPCODE_TXP, then the shadow - * comparitor was put in the correct place (and projected) by the code, + /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow + * comparator was put in the correct place (and projected) by the code, * above, that handles by-hand projection. */ - if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { + if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { /* Slot the shadow value in as the second to last component of the * coord. */ ir->shadow_comparitor->accept(this); coord_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, coord_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { - /* Mesa IR stores lod or lod bias in the last channel of the coords. */ + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { + /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, lod_info); + emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == OPCODE_TXD) + if (opcode == TGSI_OPCODE_TXD) inst = emit(ir, opcode, result_dst, coord, dx, dy); else inst = emit(ir, opcode, result_dst, coord); @@ -2235,13 +2136,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir) l = st_dst_reg(current_function->return_reg); for (i = 0; i < type_size(current_function->sig->return_type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } } - emit(ir, OPCODE_RET); + emit(ir, TGSI_OPCODE_RET); } void @@ -2252,9 +2153,9 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) if (ir->condition) { ir->condition->accept(this); this->result.negate = ~this->result.negate; - emit(ir, OPCODE_KIL, undef_dst, this->result); + emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); } else { - emit(ir, OPCODE_KIL_NV); + emit(ir, TGSI_OPCODE_KILP); } fp->UsesKill = GL_TRUE; @@ -2280,14 +2181,14 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) */ if (cond_inst == prev_inst) { st_src_reg temp = get_temp(glsl_type::bool_type); - cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); + cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); } cond_inst->cond_update = GL_TRUE; - if_inst = emit(ir->condition, OPCODE_IF); + if_inst = emit(ir->condition, TGSI_OPCODE_IF); if_inst->dst.cond_mask = COND_NE; } else { - if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); + if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); } this->instructions.push_tail(if_inst); @@ -2295,11 +2196,11 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, OPCODE_ELSE); + else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } - if_inst = emit(ir->condition, OPCODE_ENDIF); + if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() @@ -2337,7 +2238,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) foreach_iter(exec_list_iterator, iter, v->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (_mesa_is_tex_instruction(inst->op)) { + if (is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; prog->SamplerTargets[inst->sampler] = @@ -2648,7 +2549,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) /* look for instructions which read from varying vars */ foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); + const GLuint numSrc = num_inst_src_regs(inst->op); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->src[j].file == type) { @@ -2687,7 +2588,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); dst.index = i; - this->emit(NULL, OPCODE_MOV, dst, src); + this->emit(NULL, TGSI_OPCODE_MOV, dst, src); } } } @@ -2700,7 +2601,7 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); unsigned j; - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { inst->src[j].index = new_index; @@ -2723,17 +2624,17 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { return (depth == 0) ? i : loop_start; } } - if (inst->op == OPCODE_BGNLOOP) { + if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; - } else if (inst->op == OPCODE_ENDLOOP) { + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) loop_start = -1; } @@ -2759,10 +2660,10 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index) return (depth == 0) ? i : loop_start; } - if (inst->op == OPCODE_BGNLOOP) { + if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; - } else if (inst->op == OPCODE_ENDLOOP) { + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) loop_start = -1; } @@ -2784,16 +2685,16 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index) foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { last = (depth == 0) ? i : -2; } } - if (inst->op == OPCODE_BGNLOOP) + if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; - else if (inst->op == OPCODE_ENDLOOP) + else if (inst->op == TGSI_OPCODE_ENDLOOP) if (--depth == 0 && last == -2) last = i; assert(depth >= 0); @@ -2818,9 +2719,9 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index) if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) last = (depth == 0) ? i : -2; - if (inst->op == OPCODE_BGNLOOP) + if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; - else if (inst->op == OPCODE_ENDLOOP) + else if (inst->op == TGSI_OPCODE_ENDLOOP) if (--depth == 0 && last == -2) last = i; assert(depth >= 0); @@ -2922,18 +2823,18 @@ glsl_to_tgsi_visitor::copy_propagate(void) } switch (inst->op) { - case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: /* End of a basic block, clear the ACP entirely. */ memset(acp, 0, sizeof(*acp) * this->next_temp * 4); break; - case OPCODE_IF: + case TGSI_OPCODE_IF: ++level; break; - case OPCODE_ENDIF: - case OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ELSE: /* Clear all channels written inside the block from the ACP, but * leaving those that were not touched. */ @@ -2946,7 +2847,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) acp[4 * r + c] = NULL; } } - if (inst->op == OPCODE_ENDIF) + if (inst->op == TGSI_OPCODE_ENDIF) --level; break; @@ -3005,7 +2906,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) } /* If this is a copy, add it to the ACP. */ - if (inst->op == OPCODE_MOV && + if (inst->op == TGSI_OPCODE_MOV && inst->dst.file == PROGRAM_TEMPORARY && !inst->dst.reladdr && !inst->saturate && @@ -3337,11 +3238,11 @@ src_register( struct st_translate *t, } /** - * Create a TGSI ureg_dst register from a Mesa dest register. + * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, + const st_dst_reg *dst_reg, boolean saturate ) { struct ureg_dst dst = dst_register( t, @@ -3361,7 +3262,7 @@ translate_dst( struct st_translate *t, } /** - * Create a TGSI ureg_src register from a Mesa src register. + * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src translate_src( struct st_translate *t, @@ -3378,12 +3279,6 @@ translate_src( struct st_translate *t, if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); -#if 0 - // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR - if (src_reg->abs) - src = ureg_abs(src); -#endif - if (src_reg->reladdr != NULL) { /* Normally ureg_src_indirect() would be used here, but a stupid compiler * bug in g++ makes ureg_src_indirect (an inline C function) erroneously @@ -3421,77 +3316,64 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = _mesa_num_inst_dst_regs( inst->op ); - num_src = _mesa_num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs( inst->op ); + num_src = num_inst_src_regs( inst->op ); if (num_dst) dst[0] = translate_dst( t, &inst->dst, - inst->saturate); // inst->SaturateMode + inst->saturate); for (i = 0; i < num_src; i++) src[i] = translate_src( t, &inst->src[i] ); switch( inst->op ) { - case OPCODE_SWZ: - // TODO: copy emit_swz function from st_mesa_to_tgsi.c - //emit_swz( t, dst[0], &inst->src[0] ); - assert(!"OPCODE_SWZ"); - return; - - case OPCODE_BGNLOOP: - case OPCODE_CAL: - case OPCODE_ELSE: - case OPCODE_ENDLOOP: - case OPCODE_IF: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_IF: debug_assert(num_dst == 0); ureg_label_insn( ureg, - translate_opcode( inst->op ), + inst->op, src, num_src, get_label( t, - inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); return; - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXD: - case OPCODE_TXL: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; ureg_tex_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, translate_texture_target( inst->tex_target, inst->tex_shadow ), src, num_src ); return; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; - case OPCODE_NOISE1: - case OPCODE_NOISE2: - case OPCODE_NOISE3: - case OPCODE_NOISE4: - assert(!"OPCODE_NOISE should have been lowered\n"); - break; - default: ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; @@ -3993,9 +3875,8 @@ get_mesa_program(struct gl_context *ctx, add_uniforms_to_parameters_list(shader_program, shader, prog); - /* Emit Mesa IR for main(). */ + /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); - v->emit(NULL, OPCODE_END); /* Now emit bodies for any functions that were used. */ do { @@ -4007,18 +3888,18 @@ get_mesa_program(struct gl_context *ctx, if (!entry->bgn_inst) { v->current_function = entry; - entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); + entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); entry->bgn_inst->function = entry; visit_exec_list(&entry->sig->body, v); glsl_to_tgsi_instruction *last; last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); - if (last->op != OPCODE_RET) - v->emit(NULL, OPCODE_RET); + if (last->op != TGSI_OPCODE_RET) + v->emit(NULL, TGSI_OPCODE_RET); glsl_to_tgsi_instruction *end; - end = v->emit(NULL, OPCODE_ENDSUB); + end = v->emit(NULL, TGSI_OPCODE_ENDSUB); end->function = entry; progress = GL_TRUE; @@ -4050,6 +3931,9 @@ get_mesa_program(struct gl_context *ctx, v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); + + /* Write the END instruction. */ + v->emit(NULL, TGSI_OPCODE_END); if (ctx->Shader.Flags & GLSL_DUMP) { printf("\n"); @@ -4127,8 +4011,8 @@ st_new_shader_program(struct gl_context *ctx, GLuint name) /** * Link a shader. * Called via ctx->Driver.LinkShader() - * This actually involves converting GLSL IR into Mesa gl_programs with - * code lowering and other optimizations. + * This actually involves converting GLSL IR into an intermediate TGSI-like IR + * with code lowering and other optimizations. */ GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) From 16d7a717d592524e4d62fec4173cb9523f7a1453 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 2 May 2011 23:12:18 -0500 Subject: [PATCH 149/600] glsl_to_tgsi: fix shaders with indirect addressing of temps Fixes several Piglit tests, although it's a step backwards for optimization. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 4cb2f377e98..75ab9c5de7c 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -485,7 +485,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, else { for (i=0; i<3; i++) { if(inst->src[i].reladdr) { - switch(dst.file) { + switch(inst->src[i].file) { case PROGRAM_TEMPORARY: this->indirect_addr_temps = true; break; @@ -3928,9 +3928,17 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->copy_propagate(); - v->eliminate_dead_code(); - v->merge_registers(); - v->renumber_registers(); + + /* FIXME: These passes to optimize temporary registers don't work when there + * is indirect addressing of the temporary register space. We need proper + * array support so that we don't have to give up these passes in every + * shader that uses arrays. + */ + if (!v->indirect_addr_temps) { + v->merge_registers(); + v->eliminate_dead_code(); + v->renumber_registers(); + } /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); From 17b695e6e7dd730497fb60a8e161935b23fa0e9c Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 5 May 2011 21:10:28 -0500 Subject: [PATCH 150/600] gallium: add PIPE_SHADER_CAP_INTEGERS --- src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 ++ src/gallium/drivers/i915/i915_screen.c | 2 ++ src/gallium/drivers/i965/brw_screen.c | 2 ++ src/gallium/drivers/nv50/nv50_screen.c | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 2 ++ src/gallium/drivers/nvfx/nvfx_screen.c | 2 ++ src/gallium/drivers/r300/r300_screen.c | 2 ++ src/gallium/drivers/r600/r600_pipe.c | 2 ++ src/gallium/drivers/svga/svga_screen.c | 2 ++ src/gallium/include/pipe/p_defines.h | 1 + 10 files changed, 19 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 33f33aa82c7..6c32ccff323 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -400,6 +400,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; default: return 0; } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index c86baa58b28..5b3af2519fc 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -222,6 +222,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); return 0; diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 9178dfa8f69..39e9e2fa6ac 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -243,6 +243,8 @@ brw_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shad return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: assert(0); return 0; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index cc921d08666..7e436fd47d8 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -180,6 +180,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 34bf0f0a2ad..52143981500 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -167,6 +167,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 475138c3c32..d880b12fcaa 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -174,6 +174,8 @@ nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index fae03acb6d1..93baba68150 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -256,6 +256,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 16fe6c54a15..2d744137522 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -504,6 +504,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: return 0; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index b847cf331b3..4be10ef5821 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -286,6 +286,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index c0c2a7c7fd2..2c95c204e5b 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -491,6 +491,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14, PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ + PIPE_SHADER_CAP_INTEGERS = 17, }; From 6d89abadbcd68bbe9e08f041412549f8dc1fc73c Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 17 May 2011 17:13:20 -0500 Subject: [PATCH 151/600] mesa: support boolean and integer-based parameters in prog_parameter The functionality is not used by anything yet, and the glUniform functions will need to be reworked before this can reach its full usefulness. It is nonetheless a step towards integer support in the state tracker and classic drivers. --- src/mesa/main/ff_fragment_shader.cpp | 3 +- src/mesa/main/ffvertex_prog.c | 10 ++--- src/mesa/main/uniforms.c | 12 +++--- src/mesa/program/ir_to_mesa.cpp | 8 ++-- src/mesa/program/nvfragparse.c | 23 ++++++---- src/mesa/program/prog_execute.c | 2 +- src/mesa/program/prog_parameter.c | 50 +++++++++++----------- src/mesa/program/prog_parameter.h | 25 +++++++---- src/mesa/program/prog_parameter_layout.c | 2 +- src/mesa/program/prog_print.c | 2 +- src/mesa/program/program.c | 3 +- src/mesa/program/sampler.cpp | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 +++--- 13 files changed, 88 insertions(+), 66 deletions(-) diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..2ccbaf8f8c3 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -875,7 +875,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p, values[1] = s1; values[2] = s2; values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, + (gl_constant_value *) values, 4, &swizzle ); r = make_ureg(PROGRAM_CONSTANT, idx); r.swz = swizzle; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index b8e49a3757f..2d2485c9e06 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p, GLfloat s2, GLfloat s3) { - GLfloat values[4]; + gl_constant_value values[4]; GLint idx; GLuint swizzle; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle ); ASSERT(swizzle == SWIZZLE_NOOP); diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 1c4fd82baac..07d46c6404f 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = prog->Parameters->ParameterValues[base][j]; + params[k++] = prog->Parameters->ParameterValues[base][j].f; } } } @@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLdouble) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -455,7 +455,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -468,7 +468,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -670,7 +670,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of samplers to change */ for (i = 0; i < count; i++) { GLuint sampler = (GLuint) - program->Parameters->ParameterValues[index + offset + i][0]; + program->Parameters->ParameterValues[index+offset + i][0].f; GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ @@ -936,7 +936,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program, /* Ignore writes beyond the end of (the used part of) an array */ return; } - v = program->Parameters->ParameterValues[index + offset]; + v = (GLfloat *) program->Parameters->ParameterValues[index + offset]; for (row = 0; row < rows; row++) { if (transpose) { v[row] = values[src + row * cols + col]; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 00869979dd8..f27492749bd 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -599,7 +599,7 @@ ir_to_mesa_visitor::src_reg_for_float(float val) src_reg src(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + (const gl_constant_value *)&val, 1, &src.swizzle); return src; } @@ -1798,7 +1798,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) src = src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, OPCODE_MOV, mat_column, src); @@ -1836,7 +1836,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -2533,7 +2533,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c index 8516b5fc1ff..ce72c610d89 100644 --- a/src/mesa/program/nvfragparse.c +++ b/src/mesa/program/nvfragparse.c @@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number) const GLfloat *constant; if (!Parse_Identifier(parseState, ident)) RETURN_ERROR1("Expected an identifier"); - constant = _mesa_lookup_parameter_value(parseState->parameters, - -1, (const char *) ident); + constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters, + -1, + (const char *) ident); /* XXX Check that it's a constant and not a parameter */ if (!constant) { RETURN_ERROR1("Undefined symbol"); @@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->Index = paramIndex; srcReg->File = PROGRAM_NAMED_PARAM; needSuffix = GL_FALSE; @@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already defined"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "DECLARE")) { GLubyte id[100]; @@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already declared"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "END")) { inst->Opcode = OPCODE_END; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index e7553c69dbe..dbfd1b91875 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source, case PROGRAM_NAMED_PARAM: if (reg >= (GLint) prog->Parameters->NumParameters) return ZeroVec; - return prog->Parameters->ParameterValues[reg]; + return (GLfloat *) prog->Parameters->ParameterValues[reg]; case PROGRAM_SYSTEM_VALUE: assert(reg < Elements(machine->SystemValues)); diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index 3570cab118b..b1cdf8bf2c0 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size) p->Parameters = (struct gl_program_parameter *) calloc(1, size * sizeof(struct gl_program_parameter)); - p->ParameterValues = (GLfloat (*)[4]) - _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16); + p->ParameterValues = (gl_constant_value (*)[4]) + _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16); if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) { @@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) * \param name the parameter name, will be duplicated/copied! * \param size number of elements in 'values' vector (1..4, or more) * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE. - * \param values initial parameter value, up to 4 GLfloats, or NULL + * \param values initial parameter value, up to 4 gl_constant_values, or NULL * \param state state indexes, or NULL * \return index of new parameter in the list, or -1 if error (out of mem) */ GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags) { @@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, oldNum * sizeof(struct gl_program_parameter), paramList->Size * sizeof(struct gl_program_parameter)); - paramList->ParameterValues = (GLfloat (*)[4]) + paramList->ParameterValues = (gl_constant_value (*)[4]) _mesa_align_realloc(paramList->ParameterValues, /* old buf */ - oldNum * 4 * sizeof(GLfloat), /* old size */ - paramList->Size * 4 *sizeof(GLfloat), /* new sz */ + oldNum * 4 * sizeof(gl_constant_value),/* old sz */ + paramList->Size*4*sizeof(gl_constant_value),/*new*/ 16); } @@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, return -1; } else { - GLuint i; + GLuint i, j; paramList->NumParameters = oldNum + sz4; @@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, } else { /* silence valgrind */ - ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0); + for (j = 0; j < 4; j++) + paramList->ParameterValues[oldNum + i][j].f = 0; } size -= 4; } @@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]) + const char *name, const gl_constant_value values[4]) { return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name, 4, GL_NONE, values, NULL, 0x0); @@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size) { /* first check if this is a duplicate constant */ GLint pos; for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { - const GLfloat *pvals = paramList->ParameterValues[pos]; - if (pvals[0] == values[0] && - pvals[1] == values[1] && - pvals[2] == values[2] && - pvals[3] == values[3] && + const gl_constant_value *pvals = paramList->ParameterValues[pos]; + if (pvals[0].u == values[0].u && + pvals[1].u == values[1].u && + pvals[2].u == values[2].u && + pvals[3].u == values[3].u && strcmp(paramList->Parameters[pos].Name, name) == 0) { /* Same name and value is already in the param list - reuse it */ return pos; @@ -240,7 +242,7 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut) { GLint pos; @@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, struct gl_program_parameter *p = paramList->Parameters + pos; if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) { /* ok, found room */ - GLfloat *pVal = paramList->ParameterValues[pos]; + gl_constant_value *pVal = paramList->ParameterValues[pos]; GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */ pVal[p->Size] = values[0]; p->Size++; @@ -401,7 +403,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, * Lookup a parameter value by name in the given parameter list. * \return pointer to the float[4] values. */ -GLfloat * +gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name) { @@ -465,7 +467,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, */ GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut) { GLuint i; @@ -484,7 +486,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* swizzle not allowed */ GLuint j, match = 0; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) + if (v[j].u == list->ParameterValues[i][j].u) match++; } if (match == vSize) { @@ -498,7 +500,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* look for v[0] anywhere within float[4] value */ GLuint j; for (j = 0; j < list->Parameters[i].Size; j++) { - if (list->ParameterValues[i][j] == v[0]) { + if (list->ParameterValues[i][j].u == v[0].u) { /* found it */ *posOut = i; *swizzleOut = MAKE_SWIZZLE4(j, j, j, j); @@ -511,13 +513,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, GLuint swz[4]; GLuint match = 0, j, k; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) { + if (v[j].u == list->ParameterValues[i][j].u) { swz[j] = j; match++; } else { for (k = 0; k < list->Parameters[i].Size; k++) { - if (v[j] == list->ParameterValues[i][k]) { + if (v[j].u == list->ParameterValues[i][k].u) { swz[j] = k; match++; break; diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 10cbbe57a6c..dcc171ed745 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -46,7 +46,15 @@ #define PROG_PARAM_BIT_CYL_WRAP 0x10 /**< XXX gallium debug */ /*@}*/ - +/** + * Actual data for constant values of parameters. + */ +typedef union gl_constant_value { + GLfloat f; + GLboolean b; + GLint i; + GLuint u; +} gl_constant_value; /** * Program parameter. @@ -81,7 +89,7 @@ struct gl_program_parameter_list GLuint Size; /**< allocated size of Parameters, ParameterValues */ GLuint NumParameters; /**< number of parameters in arrays */ struct gl_program_parameter *Parameters; /**< Array [Size] */ - GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */ + gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */ GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes might invalidate ParameterValues[] */ }; @@ -112,22 +120,23 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list) extern GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags); extern GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]); + const char *name, const gl_constant_value values[4]); extern GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size); extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut); extern GLint @@ -143,7 +152,7 @@ extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]); -extern GLfloat * +extern gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name); @@ -153,7 +162,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, extern GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut); extern GLuint diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index 90a9771080c..28fca3b92d9 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state) switch (p->Type) { case PROGRAM_CONSTANT: { - const float *const v = + const gl_constant_value *const v = state->prog->Parameters->ParameterValues[idx]; inst->Base.SrcReg[i].Index = diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 7c3b4909e73..70412b1fa6a 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f, fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags); for (i = 0; i < list->NumParameters; i++){ struct gl_program_parameter *param = list->Parameters + i; - const GLfloat *v = list->ParameterValues[i]; + const GLfloat *v = (GLfloat *) list->ParameterValues[i]; fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}", i, param->Size, _mesa_register_file_name(list->Parameters[i].Type), diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 224446a2683..4f2b6270501 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -1030,7 +1030,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) GLuint i; GLuint whiteSwizzle; GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, - white, 4, &whiteSwizzle); + (gl_constant_value *) white, + 4, &whiteSwizzle); (void) whiteIndex; diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 1457d1199fa..e8d34c670a9 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, index += getname.offset; - return prog->Parameters->ParameterValues[index][0]; + return prog->Parameters->ParameterValues[index][0].f; } } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 75ab9c5de7c..881b9e05de1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -706,9 +706,11 @@ struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + union gl_constant_value uval; + uval.f = val; src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + &uval, 1, &src.swizzle); return src; } @@ -1791,7 +1793,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); @@ -1829,7 +1831,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -2401,7 +2403,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned @@ -3762,7 +3764,7 @@ st_translate_program( else t->constants[i] = ureg_DECL_immediate( ureg, - proginfo->Parameters->ParameterValues[i], + (GLfloat *) proginfo->Parameters->ParameterValues[i], 4 ); break; default: From f95169deb40f8245f4b3b07b17b222746da29bdd Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 13 Jun 2011 17:52:54 -0500 Subject: [PATCH 152/600] tgsi: add support for TGSI_OPCODE_MOD in tgsi_exec --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9cf74a838fe..072772eaa7e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2977,6 +2977,17 @@ micro_xor(union tgsi_exec_channel *dst, dst->u[3] = src0->u[3] ^ src1->u[3]; } +static void +micro_mod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] % src1->i[0]; + dst->i[1] = src0->i[1] % src1->i[1]; + dst->i[2] = src0->i[2] % src1->i[2]; + dst->i[3] = src0->i[3] % src1->i[3]; +} + static void micro_f2i(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -3680,7 +3691,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_XOR: From b191382c60bdcfeb7f424b23aa6ab63de81e2f08 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 13 Jun 2011 18:12:56 -0500 Subject: [PATCH 153/600] mesa, glsl_to_tgsi: add native support for integers in shaders Disabled by default on all drivers. To enable it, change ctx->GLSLVersion to 130 in st_extensions.c. Currently, softpipe is the only driver with integer support. --- src/glsl/glsl_types.h | 15 + src/mesa/main/uniforms.c | 38 ++- src/mesa/program/prog_parameter.c | 28 +- src/mesa/program/prog_parameter.h | 5 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 302 ++++++++++++++++++--- 5 files changed, 328 insertions(+), 60 deletions(-) diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 1b069df74fe..eb9d501858a 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -165,6 +165,21 @@ struct glsl_type { static const glsl_type *const mat4x3_type; static const glsl_type *const mat4_type; /*@}*/ + + /** + * Get the built-in instance of the vec4 type for a specific base type + */ + static const glsl_type *get_vec4_type(glsl_base_type base_type) + { + if (base_type == GLSL_TYPE_FLOAT) + return vec4_type; + else if (base_type == GLSL_TYPE_INT) + return ivec4_type; + else if (base_type == GLSL_TYPE_UINT) + return uvec4_type; + else + return NULL; + } /** diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 07d46c6404f..ce4863faf78 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j].f; + params[k++] = ctx->Const.GLSLVersion <= 120 ? + (GLint) prog->Parameters->ParameterValues[base][j].f : + prog->Parameters->ParameterValues[base][j].i; } } } @@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j].f; + params[k++] = ctx->Const.GLSLVersion <= 120 ? + (GLuint) prog->Parameters->ParameterValues[base][j].f : + prog->Parameters->ParameterValues[base][j].u; } } } @@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of array elements */ for (k = 0; k < count; k++) { - GLfloat *uniformVal; + gl_constant_value *uniformVal; if (offset + k >= slots) { /* Extra array data is ignored */ break; } - /* uniformVal (the destination) is always float[4] */ + /* uniformVal (the destination) is always gl_constant_value[4] */ uniformVal = program->Parameters->ParameterValues[index + offset + k]; if (basicType == GL_INT) { - /* convert user's ints to floats */ const GLint *iValues = ((const GLint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = (GLfloat) iValues[i]; + else + uniformVal[i].i = iValues[i]; } } else if (basicType == GL_UNSIGNED_INT) { - /* convert user's uints to floats */ const GLuint *iValues = ((const GLuint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = (GLfloat)(GLuint) iValues[i]; + else + uniformVal[i].u = iValues[i]; } } else { const GLfloat *fValues = ((const GLfloat *) values) + k * elems; assert(basicType == GL_FLOAT); for (i = 0; i < elems; i++) { - uniformVal[i] = fValues[i]; + uniformVal[i].f = fValues[i]; } } - /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ + /* if the uniform is bool-valued, convert to 1 or 0 */ if (isUniformBool) { for (i = 0; i < elems; i++) { - uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; + if (basicType == GL_FLOAT) + uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0; + else + uniformVal[i].b = uniformVal[i].u ? 1 : 0; + + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f; } } } diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index b1cdf8bf2c0..49b3ffbdd5c 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -241,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, * \return index/position of the new parameter in the parameter list. */ GLint -_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, - GLuint *swizzleOut) + GLenum datatype, GLuint *swizzleOut) { GLint pos; ASSERT(size >= 1); @@ -276,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, /* add a new parameter to store this constant */ pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL, - size, GL_NONE, values, NULL, 0x0); + size, datatype, values, NULL, 0x0); if (pos >= 0 && swizzleOut) { if (size == 1) *swizzleOut = SWIZZLE_XXXX; @@ -286,6 +286,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, return pos; } +/** + * Add a new unnamed constant to the parameter list. This will be used + * when a fragment/vertex program contains something like this: + * MOV r, { 0, 1, 2, 3 }; + * If swizzleOut is non-null we'll search the parameter list for an + * existing instance of the constant which matches with a swizzle. + * + * \param paramList the parameter list + * \param values four float values + * \param swizzleOut returns swizzle mask for accessing the constant + * \return index/position of the new parameter in the parameter list. + * \sa _mesa_add_typed_unnamed_constant + */ +GLint +_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLuint *swizzleOut) +{ + return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE, + swizzleOut); +} + /** * Add parameter representing a varying variable. */ diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index dcc171ed745..f858cf0fa0d 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -134,6 +134,11 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, const char *name, const gl_constant_value values[4], GLuint size); +extern GLint +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut); + extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 881b9e05de1..3f5c0c60226 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -96,11 +96,13 @@ public: else this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; } - st_src_reg(gl_register_file file, int index) + st_src_reg(gl_register_file file, int index, int type) { + this->type = type; this->file = file; this->index = index; this->swizzle = SWIZZLE_XYZW; @@ -110,6 +112,7 @@ public: st_src_reg() { + this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; this->swizzle = 0; @@ -123,23 +126,26 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate; /**< NEGATE_XYZW mask from mesa */ + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; }; class st_dst_reg { public: - st_dst_reg(gl_register_file file, int writemask) + st_dst_reg(gl_register_file file, int writemask, int type) { this->file = file; this->index = 0; this->writemask = writemask; this->cond_mask = COND_TR; this->reladdr = NULL; + this->type = type; } st_dst_reg() { + this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; this->writemask = 0; @@ -153,12 +159,14 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ GLuint cond_mask:4; + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; }; st_src_reg::st_src_reg(st_dst_reg reg) { + this->type = reg.type; this->file = reg.file; this->index = reg.index; this->swizzle = SWIZZLE_XYZW; @@ -168,6 +176,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) st_dst_reg::st_dst_reg(st_src_reg reg) { + this->type = reg.type; this->file = reg.file; this->index = reg.index; this->writemask = WRITEMASK_XYZW; @@ -267,6 +276,8 @@ public: int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; + + int glsl_version; variable_storage *find_variable_storage(ir_variable *var); @@ -276,6 +287,8 @@ public: void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); st_src_reg st_src_reg_for_float(float val); + st_src_reg st_src_reg_for_int(int val); + st_src_reg st_src_reg_for_type(int type, int val); /** * \name Visit methods @@ -327,6 +340,10 @@ public: glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); + + unsigned get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1); /** * Emit the correct dot-product instruction for the type of arguments @@ -343,6 +360,8 @@ public: void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); + void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); @@ -372,9 +391,9 @@ public: static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); static void fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); @@ -432,6 +451,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i; + + op = get_opcode(ir, op, dst, src0, src1); /* If we have to do relative addressing, we want to load the ARL * reg directly for one of the regs, and preload the other reladdr @@ -447,7 +468,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, reladdr_to_temp(ir, &src0, &num_reladdr); if (dst.reladdr) { - emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr); + emit_arl(ir, address_reg, *dst.reladdr); num_reladdr--; } assert(num_reladdr == 0); @@ -531,6 +552,62 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } +/** + * Determines whether to use an integer, unsigned integer, or float opcode + * based on the operands and input opcode, then emits the result. + * + * TODO: type checking for remaining TGSI opcodes + */ +unsigned +glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1) +{ + int type = GLSL_TYPE_FLOAT; + + if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) + type = GLSL_TYPE_FLOAT; + else if (glsl_version >= 130) + type = src0.type; + +#define case4(c, f, i, u) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ + else op = TGSI_OPCODE_##f; \ + break; +#define case3(f, i, u) case4(f, f, i, u) +#define case2fi(f, i) case4(f, f, i, i) +#define case2iu(i, u) case4(i, LAST, i, u) + + switch(op) { + case2fi(ADD, UADD); + case2fi(MUL, UMUL); + case2fi(MAD, UMAD); + case3(DIV, IDIV, UDIV); + case3(MAX, IMAX, UMAX); + case3(MIN, IMIN, UMIN); + case2iu(MOD, UMOD); + + case2fi(SEQ, USEQ); + case2fi(SNE, USNE); + case3(SGE, ISGE, USGE); + case3(SLT, ISLT, USLT); + + case2iu(SHL, SHL); + case2iu(ISHR, USHR); + case2iu(NOT, NOT); + case2iu(AND, AND); + case2iu(OR, OR); + case2iu(XOR, XOR); + + default: break; + } + + assert(op != TGSI_OPCODE_LAST); + return op; +} + void glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, @@ -607,6 +684,22 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, emit_scalar(ir, op, dst, src0, undef); } +void +glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg tmp = get_temp(glsl_type::float_type); + + if (src0.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + else if (src0.type == GLSL_TYPE_UINT) + emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + else + tmp = src0; + + emit(ir, TGSI_OPCODE_ARL, dst, tmp); +} + /** * Emit an TGSI_OPCODE_SCS instruction * @@ -705,16 +798,41 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &uval, 1, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + &uval, 1, GL_FLOAT, &src.swizzle); return src; } +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_int(int val) +{ + st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + union gl_constant_value uval; + + assert(glsl_version >= 130); + + uval.i = val; + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + &uval, 1, GL_INT, &src.swizzle); + + return src; +} + +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) +{ + if (glsl_version >= 130) + return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : + st_src_reg_for_int(val); + else + return st_src_reg_for_float(val); +} + static int type_size(const struct glsl_type *type) { @@ -759,8 +877,7 @@ type_size(const struct glsl_type *type) /** * In the initial pass of codegen, we assign temporary numbers to * intermediate results. (not SSA -- variable assignments will reuse - * storage). Actual register allocation for the Mesa VM occurs in a - * pass over the Mesa IR later. + * storage). */ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) @@ -769,6 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) int swizzle[4]; int i; + src.type = type->base_type; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -875,7 +993,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) this->variables.push_tail(storage); this->next_temp += type_size(ir->type); - dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, + glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); } @@ -890,7 +1009,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) assert(index == storage->index + (int)i); } } else { - st_src_reg src(PROGRAM_STATE_VAR, index, NULL); + st_src_reg src(PROGRAM_STATE_VAR, index, + glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1058,7 +1178,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr) return; - emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr); + emit_arl(ir, address_reg, *reg->reladdr); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); @@ -1131,13 +1251,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); break; case ir_unop_neg: - op[0].negate = ~op[0].negate; - result_src = op[0]; + assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); + if (result_dst.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else { + op[0].negate = ~op[0].negate; + result_src = op[0]; + } break; case ir_unop_abs: + assert(result_dst.type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: @@ -1200,9 +1326,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + else + emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + break; case ir_binop_mod: - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + else + emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); break; case ir_binop_less: @@ -1227,7 +1360,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = get_temp(glsl_version >= 130 ? + glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); @@ -1239,7 +1375,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = get_temp(glsl_version >= 130 ? + glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); @@ -1291,17 +1430,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: case ir_unop_b2f: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + break; + } case ir_unop_b2i: - /* Mesa IR lacks types, ints are stored as truncated floats. */ + /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ result_src = op[0]; break; case ir_unop_f2i: - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + if (glsl_version >= 130) + emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, - op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_type(result_dst.type, 0)); break; case ir_unop_trunc: emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); @@ -1329,12 +1475,40 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + break; + } case ir_unop_u2f: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + break; + } case ir_binop_lshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + break; + } case ir_binop_rshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + break; + } case ir_binop_bit_and: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + break; + } case ir_binop_bit_xor: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + break; + } case ir_binop_bit_or: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + break; + } case ir_unop_round_even: assert(!"GLSL 1.30 features unsupported"); break; @@ -1729,7 +1903,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) { st_src_reg src; GLfloat stack_vals[4] = { 0 }; - GLfloat *values = stack_vals; + gl_constant_value *values = (gl_constant_value *) stack_vals; + GLenum gl_type = GL_NONE; unsigned int i; /* Unfortunately, 4 floats is all we can get into @@ -1737,7 +1912,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ - if (ir->type->base_type == GLSL_TYPE_STRUCT) { st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); @@ -1789,13 +1963,13 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) for (i = 0; i < ir->type->matrix_columns; i++) { assert(ir->type->base_type == GLSL_TYPE_FLOAT); - values = &ir->value.f[i * ir->type->vector_elements]; + values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); + src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &src.swizzle); + values, + ir->type->vector_elements, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -1808,21 +1982,36 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - values = &ir->value.f[0]; + gl_type = GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + values[i].f = ir->value.f[i]; + } break; case GLSL_TYPE_UINT: + gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.u[i]; + if (glsl_version >= 130) + values[i].u = ir->value.u[i]; + else + values[i].f = ir->value.u[i]; } break; case GLSL_TYPE_INT: + gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.i[i]; + if (glsl_version >= 130) + values[i].i = ir->value.i[i]; + else + values[i].f = ir->value.i[i]; } break; case GLSL_TYPE_BOOL: + gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.b[i]; + if (glsl_version >= 130) + values[i].b = ir->value.b[i]; + else + values[i].f = ir->value.b[i]; } break; default: @@ -1830,9 +2019,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, + this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, ir->type->vector_elements, gl_type, &this->result.swizzle); } @@ -2535,6 +2723,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) { GLuint i; GLint outputMap[VERT_RESULT_MAX]; + GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; GLboolean usedTemps[MAX_PROGRAM_TEMPS]; GLuint firstTemp = 0; @@ -2562,6 +2751,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) outputMap[var] = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS, firstTemp); + outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; } inst->src[j].file = PROGRAM_TEMPORARY; @@ -2587,8 +2777,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) for (i = 0; i < VERT_RESULT_MAX; i++) { if (outputMap[i] >= 0) { /* MOV VAR[i], TEMP[tmp]; */ - st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); - st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); dst.index = i; this->emit(NULL, TGSI_OPCODE_MOV, dst, src); } @@ -3762,10 +3952,33 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant( ureg, i ); else - t->constants[i] = - ureg_DECL_immediate( ureg, - (GLfloat *) proginfo->Parameters->ParameterValues[i], - 4 ); + switch(proginfo->Parameters->Parameters[i].DataType) + { + case GL_FLOAT: + case GL_FLOAT_VEC2: + case GL_FLOAT_VEC3: + case GL_FLOAT_VEC4: + t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); + break; + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); + break; + case GL_UNSIGNED_INT: + case GL_UNSIGNED_INT_VEC2: + case GL_UNSIGNED_INT_VEC3: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); + break; + default: + assert(!"should not get here"); + } break; default: break; @@ -3874,6 +4087,7 @@ get_mesa_program(struct gl_context *ctx, v->prog = prog; v->shader_program = shader_program; v->options = options; + v->glsl_version = ctx->Const.GLSLVersion; add_uniforms_to_parameters_list(shader_program, shader, prog); From b2c067e3075414703a7ebad439d4290c27cab46a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 14 Jun 2011 17:38:14 -0500 Subject: [PATCH 154/600] glsl-to-tgsi: fix piglit tests This commit fixes all of the piglit tests regressed by "mesa, glsl_to_tgsi: add native support for integers in shaders" on softpipe. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3f5c0c60226..49613fccda7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -886,7 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) int swizzle[4]; int i; - src.type = type->base_type; + src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -1632,6 +1632,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); + if (glsl_version <= 120) + this->result.type = GLSL_TYPE_FLOAT; } void @@ -1966,10 +1968,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, - ir->type->vector_elements, - &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -4142,15 +4145,14 @@ get_mesa_program(struct gl_context *ctx, if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ - v->copy_propagate(); - - /* FIXME: These passes to optimize temporary registers don't work when there + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. + * FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { + v->copy_propagate(); v->merge_registers(); v->eliminate_dead_code(); v->renumber_registers(); From bf1cee9f24022e3da96d84fdc6baaa050d3eadf1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 14 Jun 2011 18:17:40 -0500 Subject: [PATCH 155/600] glsl_to_tgsi: finish some loose ends --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +++++++++++++++++----- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 49613fccda7..438f21483c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2200,7 +2200,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.grad.dPdy->accept(this); dy = this->result; break; - case ir_txf: // TODO: use TGSI_OPCODE_TXF here + case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ assert(!"GLSL 1.30 features unsupported"); break; } @@ -3731,6 +3731,37 @@ emit_wpos(struct st_context *st, emit_wpos_inversion(t, program, invert); } +/** + * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. + * TGSI uses +1 for front, -1 for back. + * This function converts the TGSI value to the GL value. Simply clamping/ + * saturating the value to [0,1] does the job. + */ +static void +emit_face_var(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst face_temp = ureg_DECL_temporary(ureg); + struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; + + /* MOV_SAT face_temp, input[face] */ + face_temp = ureg_saturate(face_temp); + ureg_MOV(ureg, face_temp, face_input); + + /* Use face_temp as face input from here on: */ + t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); +} + +static void +emit_edgeflags(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; + struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; + + ureg_MOV(ureg, edge_dst, edge_src); +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -3800,15 +3831,11 @@ st_translate_program( /* Must do this after setting up t->inputs, and before * emitting constant references, below: */ - printf("FRAG_BIT_WPOS\n"); emit_wpos(st_context(ctx), t, proginfo, ureg); } - if (proginfo->InputsRead & FRAG_BIT_FACE) { - // TODO: uncomment - printf("FRAG_BIT_FACE\n"); - //emit_face_var( t, program ); - } + if (proginfo->InputsRead & FRAG_BIT_FACE) + emit_face_var(t); /* * Declare output attributes. @@ -3875,7 +3902,6 @@ st_translate_program( /* XXX: note we are modifying the incoming shader here! Need to * do this before emitting the constant decls below, or this * will be missed. - * XXX: depends on "Parameters" field specific to Mesa IR */ unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, @@ -3887,8 +3913,8 @@ st_translate_program( t->outputs[i] = psizregtemp; } } - /*if (passthrough_edgeflags) - emit_edgeflags( t, program ); */ // TODO: uncomment + if (passthrough_edgeflags) + emit_edgeflags(t); } /* Declare address register. From b30bbd7436bdb9727d3766ba9c07abd610e6dda8 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 15 Jun 2011 14:45:03 -0500 Subject: [PATCH 156/600] glsl_to_tgsi: silence compiler warning --- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 75842286ba8..656c985d78f 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1207,7 +1207,7 @@ st_translate_mesa_program( else t->constants[i] = ureg_DECL_immediate( ureg, - program->Parameters->ParameterValues[i], + (const float*) program->Parameters->ParameterValues[i], 4 ); break; default: From 1141c3f4c4014e3c2834db65b96a3ba7cc78744a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 15 Jun 2011 17:31:51 -0500 Subject: [PATCH 157/600] glsl: remove glsl_type::get_vec4_type() Thanks to Kenneth Graunke for pointing out that glsl_type::get_instance(base, 4, 1) is the same as glsl_type::get_vec4_type(base). The function was only used in st_glsl_to_tgsi, and this commit replaces that usage with get_instance. --- src/glsl/glsl_types.h | 15 --------------- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++-- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index eb9d501858a..1b069df74fe 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -165,21 +165,6 @@ struct glsl_type { static const glsl_type *const mat4x3_type; static const glsl_type *const mat4_type; /*@}*/ - - /** - * Get the built-in instance of the vec4 type for a specific base type - */ - static const glsl_type *get_vec4_type(glsl_base_type base_type) - { - if (base_type == GLSL_TYPE_FLOAT) - return vec4_type; - else if (base_type == GLSL_TYPE_INT) - return ivec4_type; - else if (base_type == GLSL_TYPE_UINT) - return uvec4_type; - else - return NULL; - } /** diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 438f21483c7..5fedf263090 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1361,7 +1361,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); @@ -1376,7 +1376,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); From 552cc48fca9b932fceb3d8fa7f9d0067f46b67c2 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 16 Jun 2011 13:42:57 -0500 Subject: [PATCH 158/600] glsl_to_tgsi: fix compile error with g++ 4.6 --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5fedf263090..6c92441a105 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -389,7 +389,7 @@ public: void *mem_ctx; }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); From 29d21417e38aed0f0710d3692df320728aef90b1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 16 Jun 2011 18:36:16 -0500 Subject: [PATCH 159/600] glsl_to_tgsi: implement simplify_cmp pass needed by r300g --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6c92441a105..322bfbbf1ab 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -374,6 +374,7 @@ public: bool process_move_condition(ir_rvalue *ir); void remove_output_reads(gl_register_file type); + void simplify_cmp(void); void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); @@ -2788,6 +2789,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) } } +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction + */ +static int +get_src_arg_mask(st_dst_reg dst, st_src_reg src) +{ + int read_mask = 0, comp; + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + for (comp = 0; comp < 4; ++comp) { + const unsigned coord = GET_SWZ(src.swizzle, comp); + ASSERT(coord < 4); + if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. There are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +void +glsl_to_tgsi_visitor::simplify_cmp(void) +{ + unsigned tempWrites[MAX_PROGRAM_TEMPS]; + unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + + memset(tempWrites, 0, sizeof(tempWrites)); + memset(outputWrites, 0, sizeof(outputWrites)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned prevWriteMask = 0; + + /* Give up if we encounter relative addressing or flow control. */ + if (inst->dst.reladdr || + tgsi_get_opcode_info(inst->op)->is_branch || + inst->op == TGSI_OPCODE_BGNSUB || + inst->op == TGSI_OPCODE_CONT || + inst->op == TGSI_OPCODE_END || + inst->op == TGSI_OPCODE_ENDSUB || + inst->op == TGSI_OPCODE_RET) { + return; + } + + if (inst->dst.file == PROGRAM_OUTPUT) { + assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->dst.index]; + outputWrites[inst->dst.index] |= inst->dst.writemask; + } else if (inst->dst.file == PROGRAM_TEMPORARY) { + assert(inst->dst.index < MAX_PROGRAM_TEMPS); + prevWriteMask = tempWrites[inst->dst.index]; + tempWrites[inst->dst.index] |= inst->dst.writemask; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->op == TGSI_OPCODE_CMP + && !(inst->dst.writemask & prevWriteMask) + && inst->src[2].file == inst->dst.file + && inst->src[2].index == inst->dst.index + && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { + + inst->op = TGSI_OPCODE_MOV; + inst->src[0] = inst->src[1]; + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -4170,6 +4262,9 @@ get_mesa_program(struct gl_context *ctx, v->remove_output_reads(PROGRAM_OUTPUT); if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); + + /* Perform the simplify_cmp optimization, which is required by r300g. */ + v->simplify_cmp(); /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. * FIXME: These passes to optimize temporary registers don't work when there From 8c50f18b29637470539d05ccc32b0cae0092aeac Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 21 Jun 2011 21:52:19 +0100 Subject: [PATCH 160/600] glsl_to_tgsi: execute merge_registers() after eliminate_dead_code() Fixes a regression unintentionally introduced by "glsl_to_tgsi: fix shaders with indirect addressing of temps" that caused missing leaves in 3dmark01 test 4 (Nature) and missing/displaced textures on human models in Counter-Strike: Source. Signed-off-by: Emil Velikov Signed-off-by: Bryan Cain --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 322bfbbf1ab..abeb44a4083 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4274,8 +4274,8 @@ get_mesa_program(struct gl_context *ctx, */ if (!v->indirect_addr_temps) { v->copy_propagate(); - v->merge_registers(); v->eliminate_dead_code(); + v->merge_registers(); v->renumber_registers(); } From 8b881ad1c3d9dd3c96afbdbb608a7240d40e9c92 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 23 Jun 2011 19:35:36 -0500 Subject: [PATCH 161/600] glsl_to_tgsi: use swizzle_for_size for src reg in conditional moves This prevents the copy propagation pass from being confused by undefined channels and thus missing optimization opportunities. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index abeb44a4083..6d76686ab5d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1882,10 +1882,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_src_reg condition = this->result; for (i = 0; i < type_size(ir->lhs->type); i++) { + st_src_reg l_src = st_src_reg(l); + l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + if (switch_order) { - emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); } else { - emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); } l.index++; From 7ec7dd4fb6ae6c8aa29988754476e1212eb986ef Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 23 Jun 2011 19:53:37 -0500 Subject: [PATCH 162/600] glsl_to_tgsi: remove handling of XPD opcode in compile_tgsi_instruction() The opcode is never emitted by the glsl_to_tgsi_visitor, so its special case in compile_tgsi_instruction() was dead code. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6d76686ab5d..721ba28d61f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3653,14 +3653,6 @@ compile_tgsi_instruction(struct st_translate *t, src, num_src ); break; - case TGSI_OPCODE_XPD: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); - break; - default: ureg_insn( ureg, inst->op, From 41472f7809dcff114223b8fadc5b97baff6060a9 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 18:45:04 -0500 Subject: [PATCH 163/600] glsl_to_tgsi: add a better, more advanced dead code elimination pass --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 721ba28d61f..d47364fabb6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -208,6 +208,7 @@ public: int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; + int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; @@ -384,6 +385,7 @@ public: void copy_propagate(void); void eliminate_dead_code(void); + int eliminate_dead_code_advanced(void); void merge_registers(void); void renumber_registers(void); @@ -480,6 +482,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->src[1] = src1; inst->src[2] = src2; inst->ir = ir; + inst->dead_mask = 0; inst->function = NULL; @@ -3257,6 +3260,142 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) } } +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. This is less primitive than eliminate_dead_code(), as it + * is per-channel and can detect consecutive writes without a read between them + * as dead code. However, there is some dead code that can be eliminated by + * eliminate_dead_code() but not this function - for example, this function + * cannot eliminate an instruction writing to a register that is never read and + * is the only instruction writing to that register. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. + */ +int +glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +{ + glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + int removed = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the write array entirely. + * FIXME: This keeps us from killing dead code when the writes are + * on either side of a loop, even when the register isn't touched + * inside the loop. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + break; + + case TGSI_OPCODE_IF: + ++level; + break; + + case TGSI_OPCODE_ENDIF: + --level; + break; + + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the preceding if block from the + * write array, but leave those that were not touched. + * + * FIXME: This destroys opportunities to remove dead code inside of + * IF blocks that are followed by an ELSE block. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!writes[4 * r + c]) + continue; + + if (write_level[4 * r + c] >= level) + writes[4 * r + c] = NULL; + } + } + break; + + default: + /* Continuing the block, clear any channels from the write array that + * are read by this instruction. + */ + for (int i = 0; i < 4; i++) { + if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->src[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->src[i].index + c] = NULL; + } + } + } + } + break; + } + + /* If this instruction writes to a temporary, add it to the write array. + * If there is already an instruction in the write array for one or more + * of the channels, flag that channel write as dead. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + if (writes[4 * inst->dst.index + c]) { + if (write_level[4 * inst->dst.index + c] < level) + continue; + else + writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); + } + writes[4 * inst->dst.index + c] = inst; + write_level[4 * inst->dst.index + c] = level; + } + } + } + } + + /* Now actually remove the instructions that are completely dead and update + * the writemask of other instructions with dead channels. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (!inst->dead_mask || !inst->dst.writemask) + continue; + else if (inst->dead_mask == inst->dst.writemask) { + iter.remove(); + delete inst; + removed++; + } else + inst->dst.writemask &= ~(inst->dead_mask); + } + + ralloc_free(write_level); + ralloc_free(writes); + + return removed; +} + /* Merges temporary registers together where possible to reduce the number of * registers needed to run a program. * @@ -4269,6 +4408,7 @@ get_mesa_program(struct gl_context *ctx, */ if (!v->indirect_addr_temps) { v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); From 194732fd7299481dd57815f46a594d155260ce17 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 20:37:53 -0500 Subject: [PATCH 164/600] glsl_to_tgsi: use a more specific condition for gl_FragDepth hack in generating assignments This reduces the number of instructions in the fragment shader of glsl-fs-atan-2 from 174 to 146 with EmitNoIfs enabled. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d47364fabb6..5f22f7091d6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1841,7 +1841,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) if (ir->write_mask == 0) { assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; - } else if (ir->lhs->type->is_scalar()) { + } else if (ir->lhs->type->is_scalar() && + ir->lhs->variable_referenced()->mode == ir_var_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -1851,7 +1852,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) int first_enabled_chan = 0; int rhs_chan = 0; - assert(ir->lhs->type->is_vector()); l.writemask = ir->write_mask; for (int i = 0; i < 4; i++) { From 3bd06e5b82b438041f50e2469be9ea68bf3b4300 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 22:32:26 -0500 Subject: [PATCH 165/600] glsl_to_tgsi: use the correct writemask in try_emit_mad() and try_emit_sat() --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5f22f7091d6..13573fc1b94 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1133,6 +1133,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; st_src_reg a, b, c; + st_dst_reg result_dst; ir_expression *expr = ir->operands[mul_operand]->as_expression(); if (!expr || expr->operation != ir_binop_mul) @@ -1146,7 +1147,9 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); + result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); return true; } @@ -1168,8 +1171,10 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) st_src_reg src = this->result; this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); inst->saturate = true; return true; From 71cbc9e3c4c9ef6090ee31e87601ae64af26321e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 23:17:30 -0500 Subject: [PATCH 166/600] glsl_to_tgsi: improve eliminate_dead_code_advanced() --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 13573fc1b94..15a1a3c51c4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3379,6 +3379,15 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } } + /* Anything still in the write array at this point is dead code. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + glsl_to_tgsi_instruction *inst = writes[4 * r + c]; + if (inst) + inst->dead_mask |= (1 << c); + } + } + /* Now actually remove the instructions that are completely dead and update * the writemask of other instructions with dead channels. */ From f00406b68c07f97b11e873c04917cafdb1a67462 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:11:07 -0500 Subject: [PATCH 167/600] glsl_to_tgsi: improve assignment handling This is a hack, but it's better than emitting an unnecessary MOV instruction and hoping the optimization passes clean it up. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 15a1a3c51c4..e38617ae9fe 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -695,13 +695,13 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_src_reg tmp = get_temp(glsl_type::float_type); if (src0.type == GLSL_TYPE_INT) - emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); else if (src0.type == GLSL_TYPE_UINT) - emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); else tmp = src0; - emit(ir, TGSI_OPCODE_ARL, dst, tmp); + emit(NULL, TGSI_OPCODE_ARL, dst, tmp); } /** @@ -1902,6 +1902,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) l.index++; r.index++; } + } else if (ir->rhs->as_expression() && + this->instructions.get_tail() && + ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + type_size(ir->lhs->type) == 1) { + /* To avoid emitting an extra MOV when assigning an expression to a + * variable, change the destination register of the last instruction + * emitted as part of the expression to the assignment variable. + */ + glsl_to_tgsi_instruction *inst; + inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + inst->dst = l; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); From 4c8b6a286887628e5fc35306189a4c4a83c482ea Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:25:50 -0500 Subject: [PATCH 168/600] glsl_to_tgsi: fix mistake in new dead code elimination pass The conditions of IF opcodes were not being counted as reads, which sometimes led to the condition register being wrong or undefined. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e38617ae9fe..f87c64f62c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3315,10 +3315,6 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) memset(writes, 0, sizeof(*writes) * this->next_temp * 4); break; - case TGSI_OPCODE_IF: - ++level; - break; - case TGSI_OPCODE_ENDIF: --level; break; @@ -3341,6 +3337,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } break; + case TGSI_OPCODE_IF: + ++level; + /* fallthrough to default case to mark the condition as read */ + default: /* Continuing the block, clear any channels from the write array that * are read by this instruction. From 9c2810103d107d1e5ef8bd8b57819d12264f664a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:40:10 -0500 Subject: [PATCH 169/600] glsl_to_tgsi: always run copy_propagate() and eliminate_dead_code_advanced() These two passes are written to handle indirect addressing properly. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f87c64f62c7..e7d0af83a6b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4422,18 +4422,17 @@ get_mesa_program(struct gl_context *ctx, if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); - /* Perform the simplify_cmp optimization, which is required by r300g. */ + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); + v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. - * FIXME: These passes to optimize temporary registers don't work when there + /* FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { - v->copy_propagate(); - while (v->eliminate_dead_code_advanced()); v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); From 54db6e618e43abbd69b59e0a03e2b6ec83d3120f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 30 Jun 2011 13:42:37 -0500 Subject: [PATCH 170/600] r200, r600c, i965: fix build --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- src/mesa/drivers/dri/r200/r200_vertprog.c | 8 ++++---- src/mesa/drivers/dri/r600/evergreen_fragprog.c | 8 ++++---- src/mesa/drivers/dri/r600/evergreen_vertprog.c | 16 ++++++++-------- src/mesa/drivers/dri/r600/r700_fragprog.c | 8 ++++---- src/mesa/drivers/dri/r600/r700_vertprog.c | 16 ++++++++-------- 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7c73a8fbf02..31f76f8c939 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -605,7 +605,7 @@ fs_visitor::setup_paramvalues_refs() /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = - fp->Base.Parameters->ParameterValues[this->param_index[i]] + + (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + this->param_offset[i]; } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b6c9e5a1ceb..2fa04a15a34 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1359,7 +1359,7 @@ get_src_reg( struct brw_vs_compile *c, if (component >= 0) { params = c->vp->program.Base.Parameters; - f = params->ParameterValues[src->Index][component]; + f = params->ParameterValues[src->Index][component].f; if (src->Abs) f = fabs(f); diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 63e03b0e0c7..cf44d7f459c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: - *fcmd++ = paramList->ParameterValues[pi][0]; - *fcmd++ = paramList->ParameterValues[pi][1]; - *fcmd++ = paramList->ParameterValues[pi][2]; - *fcmd++ = paramList->ParameterValues[pi][3]; + *fcmd++ = paramList->ParameterValues[pi][0].f; + *fcmd++ = paramList->ParameterValues[pi][1].f; + *fcmd++ = paramList->ParameterValues[pi][2].f; + *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index e527c379b62..cc584ca2b35 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* alloc multiple of 16 constants */ diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c index 018869b9996..117916ac78f 100644 --- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx) for(ui=0; uiParameters[ui].Type == PROGRAM_UNIFORM) { - evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 40494cd6af0..6f9834e68fe 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* Load fp constants to gpu */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7d4be9180a0..b1e2742b27d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) for(ui=0; uiParameters[ui].Type == PROGRAM_UNIFORM) { - r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } From 33e0c47b05c8fbae9d7af57ba65b612825b5db60 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 4 Jul 2011 08:44:12 -0500 Subject: [PATCH 171/600] glsl_to_tgsi: replace MAX_PROGRAM_TEMPS (256) with MAX_TEMPS (4096) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e7d0af83a6b..d7afc22c048 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -77,6 +77,8 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +#define MAX_TEMPS 4096 + class st_src_reg; class st_dst_reg; @@ -2751,11 +2753,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) GLint outputMap[VERT_RESULT_MAX]; GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLboolean usedTemps[MAX_TEMPS]; GLuint firstTemp = 0; _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, - usedTemps, MAX_PROGRAM_TEMPS); + usedTemps, MAX_TEMPS); assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); @@ -2775,7 +2777,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) if (outputMap[var] == -1) { numVaryingReads++; outputMap[var] = _mesa_find_free_register(usedTemps, - MAX_PROGRAM_TEMPS, + MAX_TEMPS, firstTemp); outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; @@ -2857,7 +2859,7 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned tempWrites[MAX_PROGRAM_TEMPS]; + unsigned tempWrites[MAX_TEMPS]; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; memset(tempWrites, 0, sizeof(tempWrites)); @@ -2883,7 +2885,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) prevWriteMask = outputWrites[inst->dst.index]; outputWrites[inst->dst.index] |= inst->dst.writemask; } else if (inst->dst.file == PROGRAM_TEMPORARY) { - assert(inst->dst.index < MAX_PROGRAM_TEMPS); + assert(inst->dst.index < MAX_TEMPS); prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; } @@ -3504,7 +3506,7 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; From c0dcab2882a4731dccd363a40c3ebcabc88b9c5d Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 8 Jul 2011 21:12:08 -0500 Subject: [PATCH 172/600] st/mesa, glsl_to_tgsi: support glDrawPixels/glCopyPixels with a GLSL fragment shader active Since this was previously implemented using Mesa IR and _mesa_combine_programs, this commit adds a new code path that works with glsl_to_tgsi. --- src/mesa/state_tracker/st_cb_drawpixels.c | 65 +++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 126 +++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 + 3 files changed, 194 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 965fbcd1d9e..f4dd2a42847 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,6 +94,67 @@ is_passthrough_program(const struct gl_fragment_program *prog) } +/* XXX copied verbatim from st_atom_pixeltransfer.c */ +static struct pipe_resource * +create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + + +/** + * Returns a fragment program which implements the current pixel transfer ops. + */ +static struct gl_fragment_program * +get_glsl_pixel_transfer_program(struct st_context *st, + struct st_fragment_program *orig) +{ + int pixelMaps = 0, scaleAndBias = 0; + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { + scaleAndBias = 1; + } + + pixelMaps = ctx->Pixel.MapColorFlag; + + if (pixelMaps) { + /* create the colormap/texture now if not already done */ + if (!st->pixel_xfer.pixelmap_texture) { + st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_sampler_view = + st_create_texture_sampler_view(st->pipe, + st->pixel_xfer.pixelmap_texture); + } + } + + get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi, + scaleAndBias, pixelMaps); + + return &fp->Base; +} + /** * Make fragment shader for glDraw/CopyPixels. This shader is made @@ -107,11 +168,15 @@ st_make_drawpix_fragment_program(struct st_context *st, struct gl_fragment_program **fpOut) { struct gl_program *newProg; + struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn; if (is_passthrough_program(fpIn)) { newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, &st->pixel_xfer.program->Base); } + else if (stfp->glsl_to_tgsi != NULL) { + newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp); + } else { #if 0 /* debug */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d7afc22c048..ae0c92f5f13 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3494,6 +3494,132 @@ glsl_to_tgsi_visitor::renumber_registers(void) this->next_temp = new_index; } +/** + * Returns a fragment program which implements the current pixel transfer ops. + * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. + */ +extern "C" void +get_pixel_transfer_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + struct gl_program_parameter_list *params = _mesa_new_parameter_list(); + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = 0; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); + prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ + v->samplers_used |= (1 << 0); + + if (scale_and_bias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + GLint scale_p, bias_p; + st_src_reg scale, bias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); + bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); + inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + } + + if (pixel_maps) { + st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + assert(st->pixel_xfer.pixelmap_texture); + + /* With a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + temp_dst.writemask = WRITEMASK_XY; /* write R,G */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ + v->samplers_used |= (1 << 1); + + /* MOV colorTemp, temp; */ + inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + } + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT && + src_regs[i].index == FRAG_ATTRIB_COL0) + { + src_regs[i].file = PROGRAM_TEMPORARY; + src_regs[i].index = src0.index; + } + else if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + else if (src_regs[i].file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_combine_parameter_lists(params, + original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + _mesa_free_parameter_list(params); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index e21c0d1e0af..7884a9feb71 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -52,6 +52,9 @@ enum pipe_error st_translate_program( boolean passthrough_edgeflags); void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); +void get_pixel_transfer_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps); struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); From 5f0b4b0e9d376f9ec1cb5ae08c36052f4f51ac37 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 10 Jul 2011 17:17:38 -0500 Subject: [PATCH 173/600] st/mesa, glsl_to_tgsi: support glBitmap with a GLSL fragment shader active --- src/mesa/state_tracker/st_cb_bitmap.c | 35 +++++++++-- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 73 ++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 + 3 files changed, 105 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..f0750b518ad 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) } +static struct gl_program * +make_bitmap_fragment_program_glsl(struct st_context *st, + struct st_fragment_program *orig, + GLuint samplerIndex) +{ + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex); + return &fp->Base.Base; +} + + static int find_free_bit(uint bitfield) { @@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st, GLuint *bitmap_sampler) { struct st_fragment_program *bitmap_prog; + struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn; struct gl_program *newProg; uint sampler; @@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st, * with the bitmap sampler/kill instructions. */ sampler = find_free_bit(fpIn->Base.SamplersUsed); - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + if (stfpIn->glsl_to_tgsi) + newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler); + else { + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + } #if 0 { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ae0c92f5f13..74f15087947 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3620,6 +3620,79 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, fp->glsl_to_tgsi = v; } +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + * + * Based on make_bitmap_fragment_program in st_cb_bitmap.c. + */ +extern "C" void +get_bitmap_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, int samplerIndex) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = samplerIndex; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ + v->samplers_used |= (1 << samplerIndex); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + src0.negate = NEGATE_XYZW; + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + src0.swizzle = SWIZZLE_XXXX; + inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 7884a9feb71..d877471785d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -55,6 +55,9 @@ void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); void get_pixel_transfer_visitor(struct st_fragment_program *fp, struct glsl_to_tgsi_visitor *original, int scale_and_bias, int pixel_maps); +void get_bitmap_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int samplerIndex); struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); From 87f8d8547db9b947ae847c509a464e06d0ac6c64 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 10 Jul 2011 17:36:04 -0500 Subject: [PATCH 174/600] glsl_to_tgsi: fix mistakes in get_pixel_transfer_visitor() I noticed these issues while working on get_bitmap_visitor(). --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 74f15087947..3df22eae918 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3534,7 +3534,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, inst->tex_target = TEXTURE_2D_INDEX; prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); - prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3593,6 +3592,9 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); st_src_reg src_regs[3]; + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT && @@ -3603,8 +3605,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, } else if (src_regs[i].file == PROGRAM_INPUT) prog->InputsRead |= (1 << src_regs[i].index); - else if (src_regs[i].file == PROGRAM_OUTPUT) - prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); From 7732822c833ee22e259af3f8bd2bfb57c986612e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 21 Jul 2011 15:49:26 -0500 Subject: [PATCH 175/600] glsl_to_tgsi: separate immediates from array constants during IR translation Before, if any uniform or constant array was accessed with indirect addressing, st_translate_program() would emit uniform constants in the place of immediates. This behavior was unavoidable with ir_to_mesa/mesa_to_tgsi, but glsl_to_tgsi can work around it since the GLSL IR backend and the TGSI emission are both inside the state tracker. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 143 ++++++++++++++------- 1 file changed, 95 insertions(+), 48 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3df22eae918..389e5d8e2ef 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -70,6 +70,7 @@ extern "C" { #include "st_mesa_to_tgsi.h" } +#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ (1 << PROGRAM_STATE_VAR) | \ @@ -272,6 +273,7 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; + struct gl_program_parameter_list *immediates; int next_temp; @@ -505,6 +507,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -524,6 +529,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -804,12 +812,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_FLOAT, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, + GL_FLOAT, &src.swizzle); return src; } @@ -817,14 +825,14 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_int(int val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); union gl_constant_value uval; assert(glsl_version >= 130); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_INT, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, + GL_INT, &src.swizzle); return src; } @@ -1933,9 +1941,15 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; + gl_register_file file; + gl_program_parameter_list *param_list; + static int in_array = 0; + + file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; + param_list = in_array ? this->prog->Parameters : this->immediates; /* Unfortunately, 4 floats is all we can get into - * _mesa_add_unnamed_constant. So, make a temp to store an + * _mesa_add_typed_unnamed_constant. So, make a temp to store an * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ @@ -1969,6 +1983,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) int size = type_size(ir->type->fields.array); assert(size > 0); + in_array++; for (i = 0; i < ir->type->length; i++) { ir->array_elements[i]->accept(this); @@ -1981,6 +1996,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } } this->result = temp_base; + in_array--; return; } @@ -1992,8 +2008,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(ir->type->base_type == GLSL_TYPE_FLOAT); values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + src = st_src_reg(file, -1, ir->type->base_type); + src.index = _mesa_add_typed_unnamed_constant(param_list, values, ir->type->vector_elements, GL_FLOAT, @@ -2007,7 +2023,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) return; } - src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: gl_type = GL_FLOAT; @@ -2046,8 +2061,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(!"Non-float/uint/int/bool constant"); } - this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + this->result = st_src_reg(file, -1, ir->type); + this->result.index = _mesa_add_typed_unnamed_constant(param_list, values, ir->type->vector_elements, gl_type, &this->result.swizzle); } @@ -2430,11 +2445,13 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; + immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -3521,6 +3538,8 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; + _mesa_free_parameter_list(v->immediates); + v->immediates = _mesa_clone_parameter_list(original->immediates); /* * Get initial pixel color from the texture. @@ -3648,6 +3667,8 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; + _mesa_free_parameter_list(v->immediates); + v->immediates = _mesa_clone_parameter_list(original->immediates); /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3707,6 +3728,7 @@ struct st_translate { struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; + struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[1]; @@ -3797,6 +3819,43 @@ static void set_insn_start( struct st_translate *t, t->insn[t->insn_count++] = start; } +/** + * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. + */ +static struct ureg_src +emit_immediate( struct st_translate *t, + struct gl_program_parameter_list *params, + int index) +{ + struct ureg_program *ureg = t->ureg; + + switch(params->Parameters[index].DataType) + { + case GL_FLOAT: + case GL_FLOAT_VEC2: + case GL_FLOAT_VEC3: + case GL_FLOAT_VEC4: + return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + case GL_UNSIGNED_INT: + case GL_UNSIGNED_INT_VEC2: + case GL_UNSIGNED_INT_VEC3: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + default: + assert(!"should not get here - type must be float, int, uint, or bool"); + return ureg_src_undef(); + } +} + /** * Map a Mesa dst register to a TGSI ureg_dst register. */ @@ -3871,6 +3930,9 @@ src_register( struct st_translate *t, else return t->constants[index]; + case PROGRAM_IMMEDIATE: + return t->immediates[index]; + case PROGRAM_INPUT: assert(t->inputMapping[index] < Elements(t->inputs)); return t->inputs[t->inputMapping[index]]; @@ -4402,9 +4464,8 @@ st_translate_program( } } - /* Emit constants and immediates. Mesa uses a single index space - * for these, so we put all the translated regs in t->constants. - * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + /* Emit constants and uniforms. TGSI uses a single index space for these, + * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); @@ -4423,49 +4484,34 @@ st_translate_program( t->constants[i] = ureg_DECL_constant( ureg, i ); break; - /* Emit immediates only when there's no indirect addressing of - * the const buffer. - * FIXME: Be smarter and recognize param arrays: - * indirect addressing is only valid within the referenced - * array. - */ + /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect + * addressing of the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant( ureg, i ); else - switch(proginfo->Parameters->Parameters[i].DataType) - { - case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: - case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); - break; - default: - assert(!"should not get here"); - } + t->constants[i] = emit_immediate( t, proginfo->Parameters, i ); break; default: break; } } } + + /* Emit immediate values. + */ + t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) ); + if (t->immediates == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + for (i = 0; i < program->immediates->NumParameters; i++) { + t->immediates[i] = emit_immediate( t, program->immediates, i ); + } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { @@ -4512,6 +4558,7 @@ out: FREE(t->insn); FREE(t->labels); FREE(t->constants); + FREE(t->immediates); if (t->error) { debug_printf("%s: translate error flag set\n", __FUNCTION__); From 0da994a9f15b461d16cf88ce16dc07e98dfada6f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 21 Jul 2011 16:29:56 -0500 Subject: [PATCH 176/600] glsl_to_tgsi: make assignment hack safer Fixes an assertion failure in piglit test glsl-texcoord-array. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 389e5d8e2ef..6e01a44a733 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1917,12 +1917,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && type_size(ir->lhs->type) == 1) { /* To avoid emitting an extra MOV when assigning an expression to a - * variable, change the destination register of the last instruction - * emitted as part of the expression to the assignment variable. + * variable, emit the last instruction of the expression again, but + * replace the destination register with the target of the assignment. + * Dead code elimination will remove the original instruction. */ glsl_to_tgsi_instruction *inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - inst->dst = l; + emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); From a2c3b9f38d81f363bd62abc87dc3abef2beeba95 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 22 Jul 2011 13:23:26 -0500 Subject: [PATCH 177/600] glsl_to_tgsi: make coding style more consistent --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 263 ++++++++++----------- 1 file changed, 126 insertions(+), 137 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6e01a44a733..952900a1fb5 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3778,15 +3778,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { * of labels built here and patch the TGSI code with the actual * location of each label. */ -static unsigned *get_label( struct st_translate *t, - unsigned branch_target ) +static unsigned *get_label(struct st_translate *t, unsigned branch_target) { unsigned i; if (t->labels_count + 1 >= t->labels_size) { t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); t->labels = (struct label *)realloc(t->labels, - t->labels_size * sizeof t->labels[0]); + t->labels_size * sizeof(struct label)); if (t->labels == NULL) { static unsigned dummy; t->error = TRUE; @@ -3805,12 +3804,11 @@ static unsigned *get_label( struct st_translate *t, * Update the insn[] array so the next Mesa instruction points to * the next TGSI instruction. */ -static void set_insn_start( struct st_translate *t, - unsigned start ) +static void set_insn_start(struct st_translate *t, unsigned start) { if (t->insn_count + 1 >= t->insn_size) { t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); - t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); if (t->insn == NULL) { t->error = TRUE; return; @@ -3824,9 +3822,9 @@ static void set_insn_start( struct st_translate *t, * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. */ static struct ureg_src -emit_immediate( struct st_translate *t, - struct gl_program_parameter_list *params, - int index) +emit_immediate(struct st_translate *t, + struct gl_program_parameter_list *params, + int index) { struct ureg_program *ureg = t->ureg; @@ -3861,17 +3859,17 @@ emit_immediate( struct st_translate *t, * Map a Mesa dst register to a TGSI ureg_dst register. */ static struct ureg_dst -dst_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +dst_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); case PROGRAM_TEMPORARY: if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return t->temps[index]; @@ -3894,7 +3892,7 @@ dst_register( struct st_translate *t, return t->address[index]; default: - debug_assert( 0 ); + assert(!"unknown dst register file"); return ureg_dst_undef(); } } @@ -3903,11 +3901,11 @@ dst_register( struct st_translate *t, * Map a Mesa src register to a TGSI ureg_src register. */ static struct ureg_src -src_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +src_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); @@ -3915,7 +3913,7 @@ src_register( struct st_translate *t, assert(index >= 0); assert(index < Elements(t->temps)); if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return ureg_src(t->temps[index]); case PROGRAM_NAMED_PARAM: @@ -3927,7 +3925,7 @@ src_register( struct st_translate *t, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ if (index < 0) - return ureg_DECL_constant( t->ureg, 0 ); + return ureg_DECL_constant(t->ureg, 0); else return t->constants[index]; @@ -3950,7 +3948,7 @@ src_register( struct st_translate *t, return t->systemValues[index]; default: - debug_assert( 0 ); + assert(!"unknown src register file"); return ureg_src_undef(); } } @@ -3959,22 +3957,21 @@ src_register( struct st_translate *t, * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst -translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, - boolean saturate ) +translate_dst(struct st_translate *t, + const st_dst_reg *dst_reg, + bool saturate) { - struct ureg_dst dst = dst_register( t, - dst_reg->file, - dst_reg->index ); + struct ureg_dst dst = dst_register(t, + dst_reg->file, + dst_reg->index); - dst = ureg_writemask( dst, - dst_reg->writemask ); + dst = ureg_writemask(dst, dst_reg->writemask); if (saturate) - dst = ureg_saturate( dst ); + dst = ureg_saturate(dst); if (dst_reg->reladdr != NULL) - dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); return dst; } @@ -3983,16 +3980,15 @@ translate_dst( struct st_translate *t, * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src -translate_src( struct st_translate *t, - const st_src_reg *src_reg ) +translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + struct ureg_src src = src_register(t, src_reg->file, src_reg->index); - src = ureg_swizzle( src, - GET_SWZ( src_reg->swizzle, 0 ) & 0x3, - GET_SWZ( src_reg->swizzle, 1 ) & 0x3, - GET_SWZ( src_reg->swizzle, 2 ) & 0x3, - GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + src = ureg_swizzle(src, + GET_SWZ(src_reg->swizzle, 0) & 0x3, + GET_SWZ(src_reg->swizzle, 1) & 0x3, + GET_SWZ(src_reg->swizzle, 2) & 0x3, + GET_SWZ(src_reg->swizzle, 3) & 0x3); if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); @@ -4024,8 +4020,8 @@ translate_src( struct st_translate *t, } static void -compile_tgsi_instruction(struct st_translate *t, - const struct glsl_to_tgsi_instruction *inst) +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -4034,29 +4030,29 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = num_inst_dst_regs( inst->op ); - num_src = num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs(inst->op); + num_src = num_inst_src_regs(inst->op); if (num_dst) - dst[0] = translate_dst( t, - &inst->dst, - inst->saturate); + dst[0] = translate_dst(t, + &inst->dst, + inst->saturate); for (i = 0; i < num_src; i++) - src[i] = translate_src( t, &inst->src[i] ); + src[i] = translate_src(t, &inst->src[i]); - switch( inst->op ) { + switch(inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_CAL: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_IF: - debug_assert(num_dst == 0); - ureg_label_insn( ureg, - inst->op, - src, num_src, - get_label( t, - inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); + assert(num_dst == 0); + ureg_label_insn(ureg, + inst->op, + src, num_src, + get_label(t, + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); return; case TGSI_OPCODE_TEX: @@ -4065,27 +4061,23 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; - ureg_tex_insn( ureg, - inst->op, - dst, num_dst, - translate_texture_target( inst->tex_target, - inst->tex_shadow ), - src, num_src ); + ureg_tex_insn(ureg, + inst->op, + dst, num_dst, + translate_texture_target(inst->tex_target, inst->tex_shadow), + src, num_src); return; case TGSI_OPCODE_SCS: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); break; default: - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + ureg_insn(ureg, + inst->op, + dst, num_dst, + src, num_src); break; } } @@ -4095,9 +4087,9 @@ compile_tgsi_instruction(struct st_translate *t, * Basically, add (adjX, adjY) to the fragment position. */ static void -emit_adjusted_wpos( struct st_translate *t, - const struct gl_program *program, - GLfloat adjX, GLfloat adjY) +emit_adjusted_wpos(struct st_translate *t, + const struct gl_program *program, + float adjX, float adjY) { struct ureg_program *ureg = t->ureg; struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); @@ -4119,9 +4111,9 @@ emit_adjusted_wpos( struct st_translate *t, * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion( struct st_translate *t, - const struct gl_program *program, - boolean invert) +emit_wpos_inversion(struct st_translate *t, + const struct gl_program *program, + bool invert) { struct ureg_program *ureg = t->ureg; @@ -4140,7 +4132,7 @@ emit_wpos_inversion( struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); struct ureg_dst wpos_temp; struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; @@ -4149,26 +4141,26 @@ emit_wpos_inversion( struct st_translate *t, if (wpos_input.File == TGSI_FILE_TEMPORARY) wpos_temp = ureg_dst(wpos_input); else { - wpos_temp = ureg_DECL_temporary( ureg ); - ureg_MOV( ureg, wpos_temp, wpos_input ); + wpos_temp = ureg_DECL_temporary(ureg); + ureg_MOV(ureg, wpos_temp, wpos_input); } if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -4312,7 +4304,7 @@ st_translate_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags ) + boolean passthrough_edgeflags) { struct st_translate translate, *t; unsigned i; @@ -4358,27 +4350,24 @@ st_translate_program( for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_POSITION, /* Z / Depth */ - outputSemanticIndex[i] ); - - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Z ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_POSITION, /* Z/Depth */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); break; case TGSI_SEMANTIC_STENCIL: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_STENCIL, /* Stencil */ - outputSemanticIndex[i] ); - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Y ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); break; case TGSI_SEMANTIC_COLOR: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_COLOR, - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i]); break; default: - debug_assert(0); + assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); return PIPE_ERROR_BAD_INPUT; } } @@ -4392,9 +4381,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); } } else { @@ -4405,9 +4394,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { /* Writing to the point size result register requires special * handling to implement clamping. @@ -4421,8 +4410,8 @@ st_translate_program( unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, pointSizeClampState); - struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); - t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); + t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); t->pointSizeResult = t->outputs[i]; t->pointSizeOutIndex = i; t->outputs[i] = psizregtemp; @@ -4435,8 +4424,8 @@ st_translate_program( /* Declare address register. */ if (program->num_address_regs > 0) { - debug_assert( program->num_address_regs == 1 ); - t->address[0] = ureg_DECL_address( ureg ); + assert(program->num_address_regs == 1); + t->address[0] = ureg_DECL_address(ureg); } /* Declare misc input registers @@ -4461,7 +4450,7 @@ st_translate_program( */ for (i = 0; i < (unsigned)program->next_temp; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_temporary( t->ureg ); + t->temps[i] = ureg_DECL_temporary(t->ureg); } } @@ -4469,7 +4458,7 @@ st_translate_program( * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { - t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -4482,7 +4471,7 @@ st_translate_program( case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); break; /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect @@ -4493,9 +4482,9 @@ st_translate_program( */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate( t, proginfo->Parameters, i ); + t->constants[i] = emit_immediate(t, proginfo->Parameters, i); break; default: break; @@ -4505,27 +4494,28 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) ); + t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } for (i = 0; i < program->immediates->NumParameters; i++) { - t->immediates[i] = emit_immediate( t, program->immediates, i ); + assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); + t->immediates[i] = emit_immediate(t, program->immediates, i); } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { - t->samplers[i] = ureg_DECL_sampler( ureg, i ); + t->samplers[i] = ureg_DECL_sampler(ureg, i); } } /* Emit each instruction in turn: */ foreach_iter(exec_list_iterator, iter, program->instructions) { - set_insn_start( t, ureg_get_instruction_number( ureg )); - compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + set_insn_start(t, ureg_get_instruction_number(ureg)); + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); if (t->prevInstWrotePointSize && proginfo->Id) { /* The previous instruction wrote to the (fake) vertex point size @@ -4535,14 +4525,14 @@ st_translate_program( * Note that we can't do this easily at the end of program due to * possible early return. */ - set_insn_start( t, ureg_get_instruction_number( ureg )); - ureg_MAX( t->ureg, - ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 1,1,1,1)); - ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + set_insn_start(t, ureg_get_instruction_number(ureg)); + ureg_MAX(t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); } t->prevInstWrotePointSize = GL_FALSE; } @@ -4550,9 +4540,8 @@ st_translate_program( /* Fix up all emitted labels: */ for (i = 0; i < t->labels_count; i++) { - ureg_fixup_label( ureg, - t->labels[i].token, - t->insn[t->labels[i].branch_target] ); + ureg_fixup_label(ureg, t->labels[i].token, + t->insn[t->labels[i].branch_target]); } out: @@ -4582,7 +4571,7 @@ get_mesa_program(struct gl_context *ctx, struct gl_program *prog; GLenum target; const char *target_string; - GLboolean progress; + bool progress; struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; From f751730ad003bb19ce85bc4d0abddaf40edde6c1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 22 Jul 2011 13:24:42 -0500 Subject: [PATCH 178/600] glsl_to_tgsi: update comments --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 952900a1fb5..3a69a439822 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -236,7 +236,7 @@ public: /** * identifier of this function signature used by the program. * - * At the point that Mesa instructions for function calls are + * At the point that TGSI instructions for function calls are * generated, we don't know the address of the first instruction of * the function body. So we make the BranchTarget that is called a * small integer and rewrite them during set_branchtargets(). @@ -251,10 +251,9 @@ public: glsl_to_tgsi_instruction *bgn_inst; /** - * Index of the first instruction of the function body in actual - * Mesa IR. + * Index of the first instruction of the function body in actual TGSI. * - * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + * Set after conversion from glsl_to_tgsi_instruction to TGSI. */ int inst; @@ -1672,7 +1671,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } else { st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register + * base of the array and an index that offsets the TGSI register * index. */ ir->array_index->accept(this); @@ -1879,7 +1878,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) /* Swizzle a small RHS vector into the channels being written. * * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while Mesa IR treats write_mask as just + * present on the RHS while TGSI treats write_mask as just * showing which channels of the vec4 RHS get written. */ for (int i = 0; i < 4; i++) { @@ -2202,8 +2201,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if - * we're doing plain old texturing. Mesa IR optimization should - * handle cleaning up our mess in that case. + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); @@ -3799,9 +3798,9 @@ static unsigned *get_label(struct st_translate *t, unsigned branch_target) } /** - * Called prior to emitting the TGSI code for each Mesa instruction. + * Called prior to emitting the TGSI code for each instruction. * Allocate additional space for instructions if needed. - * Update the insn[] array so the next Mesa instruction points to + * Update the insn[] array so the next glsl_to_tgsi_instruction points to * the next TGSI instruction. */ static void set_insn_start(struct st_translate *t, unsigned start) @@ -3856,7 +3855,7 @@ emit_immediate(struct st_translate *t, } /** - * Map a Mesa dst register to a TGSI ureg_dst register. + * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. */ static struct ureg_dst dst_register(struct st_translate *t, @@ -3898,7 +3897,7 @@ dst_register(struct st_translate *t, } /** - * Map a Mesa src register to a TGSI ureg_src register. + * Map a glsl_to_tgsi src register to a TGSI ureg_src register. */ static struct ureg_src src_register(struct st_translate *t, From 3354a5b56398f90fc36ab14b6444aae27b50e859 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 15:20:19 -0500 Subject: [PATCH 179/600] glsl_to_tgsi: rework immediate tracking to not use gl_program_parameter_list --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 135 ++++++++++++++------- 1 file changed, 88 insertions(+), 47 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3a69a439822..6039488f26b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -229,6 +229,20 @@ public: ir_variable *var; /* variable that maps to this, if any */ }; +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + class function_entry : public exec_node { public: ir_function_signature *sig; @@ -272,7 +286,6 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; - struct gl_program_parameter_list *immediates; int next_temp; @@ -285,6 +298,9 @@ public: variable_storage *find_variable_storage(ir_variable *var); + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + function_entry *get_function_signature(ir_function_signature *sig); st_src_reg get_temp(const glsl_type *type); @@ -326,6 +342,10 @@ public: /** List of variable_storage */ exec_list variables; + /** List of immediate_storage */ + exec_list immediates; + int num_immediates; + /** List of function_entry */ exec_list function_signatures; int next_signature_id; @@ -808,6 +828,42 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, } } +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + fprintf(stderr, "adding immediate\n"); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { @@ -815,8 +871,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_FLOAT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); return src; } @@ -830,8 +885,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) assert(glsl_version >= 130); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_INT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); return src; } @@ -1941,12 +1995,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; - gl_register_file file; - gl_program_parameter_list *param_list; static int in_array = 0; - - file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; - param_list = in_array ? this->prog->Parameters : this->immediates; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; /* Unfortunately, 4 floats is all we can get into * _mesa_add_typed_unnamed_constant. So, make a temp to store an @@ -2009,11 +2059,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(file, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(param_list, - values, - ir->type->vector_elements, - GL_FLOAT, - &src.swizzle); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -2062,9 +2112,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(file, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(param_list, - values, ir->type->vector_elements, gl_type, - &this->result.swizzle); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); } function_entry * @@ -2441,17 +2493,16 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() result.file = PROGRAM_UNDEFINED; next_temp = 1; next_signature_id = 1; + num_immediates = 0; current_function = NULL; num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; - immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { - _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -3538,8 +3589,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); /* * Get initial pixel color from the texture. @@ -3667,8 +3717,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3822,32 +3871,20 @@ static void set_insn_start(struct st_translate *t, unsigned start) */ static struct ureg_src emit_immediate(struct st_translate *t, - struct gl_program_parameter_list *params, - int index) + gl_constant_value values[4], + int type, int size) { struct ureg_program *ureg = t->ureg; - switch(params->Parameters[index].DataType) + switch(type) { case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_int(ureg, &values[0].i, size); case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); default: assert(!"should not get here - type must be float, int, uint, or bool"); return ureg_src_undef(); @@ -4483,7 +4520,10 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate(t, proginfo->Parameters, i); + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); break; default: break; @@ -4493,14 +4533,15 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } - for (i = 0; i < program->immediates->NumParameters; i++) { - assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); - t->immediates[i] = emit_immediate(t, program->immediates, i); + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); } /* texture samplers */ From 10d31cb307f90a08fafed5c67945ffe53d279940 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 15:45:16 -0500 Subject: [PATCH 180/600] glsl_to_tgsi: lower all ir_quadop_vector expressions Unlike Mesa IR, TGSI doesn't have a SWZ opcode. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6039488f26b..0cbfc943a05 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4825,7 +4825,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; - progress = lower_quadop_vector(ir, true) || progress; + progress = lower_quadop_vector(ir, false) || progress; if (options->EmitNoIfs) { progress = lower_discard(ir) || progress; From 3e7fce9773ec332665326a785b6ed1fcf5bd578e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 16:36:10 -0500 Subject: [PATCH 181/600] glsl_to_tgsi: add each relative address to the previous This is a glsl_to_tgsi port of commit d6e1a8f71437. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0cbfc943a05..f66e240a177 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1741,6 +1741,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) this->result, st_src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } From 189e9f12c7d3a82d7dd28695935a83e4319bb267 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 16:39:40 -0500 Subject: [PATCH 182/600] glsl_to_tgsi: copy reladdr in st_src_reg(st_dst_reg) constructor This is a glsl_to_tgsi port of commit f7cd9a858c04. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f66e240a177..ba4074eecd5 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -174,7 +174,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } st_dst_reg::st_dst_reg(st_src_reg reg) From 81b036b4d79423c194596461b098a525af0102c2 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 30 Jul 2011 16:44:49 -0700 Subject: [PATCH 183/600] i965/gen5+: Fix incorrect miptree layout for non-power-of-two cubemaps. For power-of-two sizes, h0 == mt->height0 since it's already a multiple of two. However, for NPOT, they're different; h1 should be computed based on the original size. Fixes piglit test "cubemap npot" and oglconform test "textureNPOT". NOTE: This is a candidate for stable release branches. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index f462f32b19a..46a417a08ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * given in Volume 1 of the BSpec. */ h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(h0), align_h); + h1 = ALIGN(minify(mt->height0), align_h); qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; From 586e741ac1fa222d041990b265e820f5aa11344d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 28 Jul 2011 14:04:09 -0700 Subject: [PATCH 184/600] linker: Make linker_error set LinkStatus to false Remove the other places that set LinkStatus to false since they all immediately follow a call to linker_error. The function linker_error was previously known as linker_error_printf. The name was changed because it may seem surprising that a printf function will set an error flag. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/glsl/ir_function_detect_recursion.cpp | 4 +- src/glsl/link_functions.cpp | 4 +- src/glsl/linker.cpp | 180 +++++++++++----------- src/glsl/linker.h | 2 +- 4 files changed, 90 insertions(+), 100 deletions(-) diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp index 44a1cd0b950..a3b461818d3 100644 --- a/src/glsl/ir_function_detect_recursion.cpp +++ b/src/glsl/ir_function_detect_recursion.cpp @@ -311,9 +311,7 @@ emit_errors_linked(const void *key, void *data, void *closure) f->sig->function_name(), &f->sig->parameters); - linker_error_printf(prog, - "function `%s' has static recursion.\n", - proto); + linker_error(prog, "function `%s' has static recursion.\n", proto); ralloc_free(proto); prog->LinkStatus = false; } diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp index 7ba760daa1a..d40f771e342 100644 --- a/src/glsl/link_functions.cpp +++ b/src/glsl/link_functions.cpp @@ -91,8 +91,8 @@ public: if (sig == NULL) { /* FINISHME: Log the full signature of unresolved function. */ - linker_error_printf(this->prog, "unresolved reference to function " - "`%s'\n", name); + linker_error(this->prog, "unresolved reference to function `%s'\n", + name); this->success = false; return visit_stop; } diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index fe570b6cc45..35d893f499e 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -164,7 +164,7 @@ private: void -linker_error_printf(gl_shader_program *prog, const char *fmt, ...) +linker_error(gl_shader_program *prog, const char *fmt, ...) { va_list ap; @@ -172,6 +172,8 @@ linker_error_printf(gl_shader_program *prog, const char *fmt, ...) va_start(ap, fmt); ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); va_end(ap); + + prog->LinkStatus = false; } @@ -243,8 +245,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog, find_assignment_visitor find("gl_Position"); find.run(shader->ir); if (!find.variable_found()) { - linker_error_printf(prog, - "vertex shader does not write to `gl_Position'\n"); + linker_error(prog, "vertex shader does not write to `gl_Position'\n"); return false; } @@ -271,8 +272,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog, frag_data.run(shader->ir); if (frag_color.variable_found() && frag_data.variable_found()) { - linker_error_printf(prog, "fragment shader writes to both " - "`gl_FragColor' and `gl_FragData'\n"); + linker_error(prog, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'\n"); return false; } @@ -357,11 +358,11 @@ cross_validate_globals(struct gl_shader_program *prog, existing->type = var->type; } } else { - linker_error_printf(prog, "%s `%s' declared as type " - "`%s' and type `%s'\n", - mode_string(var), - var->name, var->type->name, - existing->type->name); + linker_error(prog, "%s `%s' declared as type " + "`%s' and type `%s'\n", + mode_string(var), + var->name, var->type->name, + existing->type->name); return false; } } @@ -369,9 +370,9 @@ cross_validate_globals(struct gl_shader_program *prog, if (var->explicit_location) { if (existing->explicit_location && (var->location != existing->location)) { - linker_error_printf(prog, "explicit locations for %s " - "`%s' have differing values\n", - mode_string(var), var->name); + linker_error(prog, "explicit locations for %s " + "`%s' have differing values\n", + mode_string(var), var->name); return false; } @@ -392,12 +393,12 @@ cross_validate_globals(struct gl_shader_program *prog, bool layout_declared = var->depth_layout != ir_depth_layout_none; bool layout_differs = var->depth_layout != existing->depth_layout; if (layout_declared && layout_differs) { - linker_error_printf(prog, + linker_error(prog, "All redeclarations of gl_FragDepth in all fragment shaders " "in a single program must have the same set of qualifiers."); } if (var->used && layout_differs) { - linker_error_printf(prog, + linker_error(prog, "If gl_FragDepth is redeclared with a layout qualifier in" "any fragment shader, it must be redeclared with the same" "layout qualifier in all fragment shaders that have" @@ -410,9 +411,9 @@ cross_validate_globals(struct gl_shader_program *prog, if (var->constant_value != NULL) { if (existing->constant_value != NULL) { if (!var->constant_value->has_value(existing->constant_value)) { - linker_error_printf(prog, "initializers for %s " - "`%s' have differing values\n", - mode_string(var), var->name); + linker_error(prog, "initializers for %s " + "`%s' have differing values\n", + mode_string(var), var->name); return false; } } else @@ -433,15 +434,15 @@ cross_validate_globals(struct gl_shader_program *prog, } if (existing->invariant != var->invariant) { - linker_error_printf(prog, "declarations for %s `%s' have " - "mismatching invariant qualifiers\n", - mode_string(var), var->name); + linker_error(prog, "declarations for %s `%s' have " + "mismatching invariant qualifiers\n", + mode_string(var), var->name); return false; } if (existing->centroid != var->centroid) { - linker_error_printf(prog, "declarations for %s `%s' have " - "mismatching centroid qualifiers\n", - mode_string(var), var->name); + linker_error(prog, "declarations for %s `%s' have " + "mismatching centroid qualifiers\n", + mode_string(var), var->name); return false; } } else @@ -529,13 +530,12 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, */ if (!output->type->is_array() || (strncmp("gl_", output->name, 3) != 0)) { - linker_error_printf(prog, - "%s shader output `%s' declared as " - "type `%s', but %s shader input declared " - "as type `%s'\n", - producer_stage, output->name, - output->type->name, - consumer_stage, input->type->name); + linker_error(prog, + "%s shader output `%s' declared as type `%s', " + "but %s shader input declared as type `%s'\n", + producer_stage, output->name, + output->type->name, + consumer_stage, input->type->name); return false; } } @@ -543,40 +543,40 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, /* Check that all of the qualifiers match between stages. */ if (input->centroid != output->centroid) { - linker_error_printf(prog, - "%s shader output `%s' %s centroid qualifier, " - "but %s shader input %s centroid qualifier\n", - producer_stage, - output->name, - (output->centroid) ? "has" : "lacks", - consumer_stage, - (input->centroid) ? "has" : "lacks"); + linker_error(prog, + "%s shader output `%s' %s centroid qualifier, " + "but %s shader input %s centroid qualifier\n", + producer_stage, + output->name, + (output->centroid) ? "has" : "lacks", + consumer_stage, + (input->centroid) ? "has" : "lacks"); return false; } if (input->invariant != output->invariant) { - linker_error_printf(prog, - "%s shader output `%s' %s invariant qualifier, " - "but %s shader input %s invariant qualifier\n", - producer_stage, - output->name, - (output->invariant) ? "has" : "lacks", - consumer_stage, - (input->invariant) ? "has" : "lacks"); + linker_error(prog, + "%s shader output `%s' %s invariant qualifier, " + "but %s shader input %s invariant qualifier\n", + producer_stage, + output->name, + (output->invariant) ? "has" : "lacks", + consumer_stage, + (input->invariant) ? "has" : "lacks"); return false; } if (input->interpolation != output->interpolation) { - linker_error_printf(prog, - "%s shader output `%s' specifies %s " - "interpolation qualifier, " - "but %s shader input specifies %s " - "interpolation qualifier\n", - producer_stage, - output->name, - output->interpolation_string(), - consumer_stage, - input->interpolation_string()); + linker_error(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + producer_stage, + output->name, + output->interpolation_string(), + consumer_stage, + input->interpolation_string()); return false; } } @@ -823,9 +823,8 @@ link_intrastage_shaders(void *mem_ctx, if ((other_sig != NULL) && other_sig->is_defined && !other_sig->is_builtin) { - linker_error_printf(prog, - "function `%s' is multiply defined", - f->name); + linker_error(prog, "function `%s' is multiply defined", + f->name); return NULL; } } @@ -849,9 +848,9 @@ link_intrastage_shaders(void *mem_ctx, } if (main == NULL) { - linker_error_printf(prog, "%s shader lacks `main'\n", - (shader_list[0]->Type == GL_VERTEX_SHADER) - ? "vertex" : "fragment"); + linker_error(prog, "%s shader lacks `main'\n", + (shader_list[0]->Type == GL_VERTEX_SHADER) + ? "vertex" : "fragment"); return NULL; } @@ -1309,10 +1308,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog, * attribute overlaps any previously allocated bits. */ if ((~(use_mask << attr) & used_locations) != used_locations) { - linker_error_printf(prog, - "insufficient contiguous attribute locations " - "available for vertex shader input `%s'", - var->name); + linker_error(prog, + "insufficient contiguous attribute locations " + "available for vertex shader input `%s'", + var->name); return false; } @@ -1353,11 +1352,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog, if ((var->location >= (int)(max_index + generic_base)) || (var->location < 0)) { - linker_error_printf(prog, - "invalid explicit location %d specified for " - "`%s'\n", - (var->location < 0) ? var->location : attr, - var->name); + linker_error(prog, + "invalid explicit location %d specified for `%s'\n", + (var->location < 0) ? var->location : attr, + var->name); return false; } else if (var->location >= generic_base) { used_locations |= (use_mask << attr); @@ -1406,10 +1404,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog, const char *const string = (target_index == MESA_SHADER_VERTEX) ? "vertex shader input" : "fragment shader output"; - linker_error_printf(prog, - "insufficient contiguous attribute locations " - "available for %s `%s'", - string, to_assign[i].var->name); + linker_error(prog, + "insufficient contiguous attribute locations " + "available for %s `%s'", + string, to_assign[i].var->name); return false; } @@ -1525,9 +1523,8 @@ assign_varying_locations(struct gl_context *ctx, * "glsl1-varying read but not written" in piglit. */ - linker_error_printf(prog, "fragment shader varying %s not written " - "by vertex shader\n.", var->name); - prog->LinkStatus = false; + linker_error(prog, "fragment shader varying %s not written " + "by vertex shader\n.", var->name); } /* An 'in' variable is only really a shader input if its @@ -1544,17 +1541,17 @@ assign_varying_locations(struct gl_context *ctx, if (ctx->API == API_OPENGLES2 || prog->Version == 100) { if (varying_vectors > ctx->Const.MaxVarying) { - linker_error_printf(prog, "shader uses too many varying vectors " - "(%u > %u)\n", - varying_vectors, ctx->Const.MaxVarying); + linker_error(prog, "shader uses too many varying vectors " + "(%u > %u)\n", + varying_vectors, ctx->Const.MaxVarying); return false; } } else { const unsigned float_components = varying_vectors * 4; if (float_components > ctx->Const.MaxVarying * 4) { - linker_error_printf(prog, "shader uses too many varying components " - "(%u > %u)\n", - float_components, ctx->Const.MaxVarying * 4); + linker_error(prog, "shader uses too many varying components " + "(%u > %u)\n", + float_components, ctx->Const.MaxVarying * 4); return false; } } @@ -1618,8 +1615,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) assert(max_version <= 130); if ((max_version >= 130 || min_version == 100) && min_version != max_version) { - linker_error_printf(prog, "all shaders must use same shading " - "language version\n"); + linker_error(prog, "all shaders must use same shading " + "language version\n"); goto done; } @@ -1720,12 +1717,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * FINISHME: at least 16, so hardcode 16 for now. */ if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) { - prog->LinkStatus = false; goto done; } if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, ctx->Const.MaxDrawBuffers)) { - prog->LinkStatus = false; goto done; } @@ -1742,7 +1737,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (!assign_varying_locations(ctx, prog, prog->_LinkedShaders[prev], prog->_LinkedShaders[i])) { - prog->LinkStatus = false; goto done; } @@ -1774,11 +1768,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) */ if (ctx->API == API_OPENGLES2 || prog->Version == 100) { if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { - linker_error_printf(prog, "program lacks a vertex shader\n"); - prog->LinkStatus = false; + linker_error(prog, "program lacks a vertex shader\n"); } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { - linker_error_printf(prog, "program lacks a fragment shader\n"); - prog->LinkStatus = false; + linker_error(prog, "program lacks a fragment shader\n"); } } diff --git a/src/glsl/linker.h b/src/glsl/linker.h index a8ce16a7ec1..dfae073c274 100644 --- a/src/glsl/linker.h +++ b/src/glsl/linker.h @@ -26,7 +26,7 @@ #define GLSL_LINKER_H extern void -linker_error_printf(gl_shader_program *prog, const char *fmt, ...); +linker_error(gl_shader_program *prog, const char *fmt, ...); extern bool link_function_calls(gl_shader_program *prog, gl_shader *main, From 379a32f42ebca9feeb024633f7774661619fd62e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 28 Jul 2011 14:09:06 -0700 Subject: [PATCH 185/600] linker: Make linker_{error,warning} generally available linker_warning is a new function. It's identical to linker_error except that it doesn't set LinkStatus=false and it prepends "warning: " on messages instead of "error: ". Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/glsl/ir_function_detect_recursion.cpp | 1 + src/glsl/linker.cpp | 13 +++++++++++++ src/glsl/linker.h | 3 --- src/glsl/program.h | 8 ++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp index a3b461818d3..8f805bf1ba9 100644 --- a/src/glsl/ir_function_detect_recursion.cpp +++ b/src/glsl/ir_function_detect_recursion.cpp @@ -125,6 +125,7 @@ #include "glsl_parser_extras.h" #include "linker.h" #include "program/hash_table.h" +#include "program.h" struct call_node : public exec_node { class function *func; diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 35d893f499e..19eb9b5ff6f 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -177,6 +177,19 @@ linker_error(gl_shader_program *prog, const char *fmt, ...) } +void +linker_warning(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + ralloc_strcat(&prog->InfoLog, "error: "); + va_start(ap, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); + va_end(ap); + +} + + void invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode, int generic_base) diff --git a/src/glsl/linker.h b/src/glsl/linker.h index dfae073c274..769cf68b6ad 100644 --- a/src/glsl/linker.h +++ b/src/glsl/linker.h @@ -25,9 +25,6 @@ #ifndef GLSL_LINKER_H #define GLSL_LINKER_H -extern void -linker_error(gl_shader_program *prog, const char *fmt, ...); - extern bool link_function_calls(gl_shader_program *prog, gl_shader *main, gl_shader **shader_list, unsigned num_shaders); diff --git a/src/glsl/program.h b/src/glsl/program.h index db602fa9ec2..437ca1462fa 100644 --- a/src/glsl/program.h +++ b/src/glsl/program.h @@ -25,3 +25,11 @@ extern void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); + +extern void +linker_error(gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); + +extern void +linker_warning(gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); From 89193933cbd322cd08fb54232411a8a9221fcca8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 28 Jul 2011 15:10:17 -0700 Subject: [PATCH 186/600] mesa: Ensure that gl_shader_program::InfoLog is never NULL This prevents assertion failures in ralloc_strcat. The ralloc_free in _mesa_free_shader_program_data can be omitted because freeing the gl_shader_program in _mesa_delete_shader_program will take care of this automatically. A bunch of this code could use a refactor to use ralloc a bit more effectively. A bunch of the things that are allocated with malloc and owned by the gl_shader_program should be allocated with ralloc (using the gl_shader_program as the context). Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/glsl/main.cpp | 1 + src/mesa/main/shaderobj.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 9f85096e1a1..9b8a50738ac 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -221,6 +221,7 @@ main(int argc, char **argv) whole_program = rzalloc (NULL, struct gl_shader_program); assert(whole_program != NULL); + whole_program->InfoLog = ralloc_strdup(whole_program, ""); for (/* empty */; argc > optind; optind++) { whole_program->Shaders = diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 33d91ad594d..f128648f477 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog prog->Geom.InputType = GL_TRIANGLES; prog->Geom.OutputType = GL_TRIANGLE_STRIP; #endif + + prog->InfoLog = ralloc_strdup(prog, ""); } /** @@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx, _mesa_free_parameter_list(shProg->Varying); shProg->Varying = NULL; } + + assert(shProg->InfoLog != NULL); + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); } @@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->Shaders = NULL; } - if (shProg->InfoLog) { - ralloc_free(shProg->InfoLog); - shProg->InfoLog = NULL; - } - /* Transform feedback varying vars */ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { free(shProg->TransformFeedback.VaryingNames[i]); From 8aadd89d07d750aadd10989fa9c81f8a2fdd98e2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 15:55:59 -0700 Subject: [PATCH 187/600] ir_to_mesa: Use Add linker_error instead of fail_link The functions were almost identical. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 53 ++++++++++++++------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 8b4a535b75f..a0188128e2a 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -331,20 +331,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, args); - va_end(args); - - prog->LinkStatus = GL_FALSE; -} - static int swizzle_for_size(int size) { @@ -789,10 +775,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir) if (storage->file == PROGRAM_TEMPORARY && dst.index != storage->index + (int) ir->num_state_slots) { - fail_link(this->shader_program, - "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); + linker_error(this->shader_program, + "failed to load builtin uniform `%s' " + "(%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); } } } @@ -2413,29 +2400,32 @@ check_resources(const struct gl_context *ctx, case GL_VERTEX_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); + linker_error(shader_program, + "Too many vertex shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); + linker_error(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); + linker_error(shader_program, + "Too many geometry shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); + linker_error(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); + linker_error(shader_program, + "Too many fragment shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); + linker_error(shader_program, "Too many fragment shader constants"); } break; default: @@ -2550,9 +2540,10 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, * from _mesa_add_uniform) has to match what the linker chose. */ if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); + linker_error(shader_program, + "Allocation of uniform `%s' to target failed " + "(%d vs %d)\n", + uniform->Name, index, parameter_index); } } } @@ -2585,8 +2576,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, int loc = _mesa_get_uniform_location(ctx, shader_program, name); if (loc == -1) { - fail_link(shader_program, - "Couldn't find uniform for initializer %s\n", name); + linker_error(shader_program, + "Couldn't find uniform for initializer %s\n", name); return; } @@ -2987,7 +2978,7 @@ get_mesa_program(struct gl_context *ctx, prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); + linker_error(shader_program, "Couldn't flatten if statement\n"); } switch (mesa_inst->Opcode) { @@ -3258,7 +3249,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) for (i = 0; i < prog->NumShaders; i++) { if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); + linker_error(prog, "linking with uncompiled shader"); prog->LinkStatus = GL_FALSE; } } From 322c3bf9dc4c6edbf5a8793475ce1307e1c0186b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 15:58:07 -0700 Subject: [PATCH 188/600] ir_to_mesa: Emit warnings instead of errors for IR that can't be lowered Rely on the driver to do the right thing. This probably means falling back to software. Page 88 of the OpenGL 2.1 spec specifically says: "A shader should not fail to compile, and a program object should not fail to link due to lack of instruction space or lack of temporary variables. Implementations should ensure that all valid shaders and program objects may be successfully compiled, linked and executed." There is no provision for saying "No" to a valid shader that is difficult for the hardware to handle, so stop doing that. On i915 this causes a large number of piglit tests to change from FAIL to WARN. The warning is because the driver still emits messages to stderr like "i915_program_error: Unsupported opcode: BGNLOOP". It also fixes ES2 conformance CorrectFull_frag and CorrectParse1_frag on i915 (and probably other hardware that can't handle loops). Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index a0188128e2a..382cda0c703 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2977,11 +2977,31 @@ get_mesa_program(struct gl_context *ctx, if (mesa_inst->SrcReg[src].RelAddr) prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - linker_error(shader_program, "Couldn't flatten if statement\n"); - } - switch (mesa_inst->Opcode) { + case OPCODE_IF: + if (options->EmitNoIfs) { + linker_warning(shader_program, + "Couldn't flatten if-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_BGNLOOP: + if (options->EmitNoLoops) { + linker_warning(shader_program, + "Couldn't unroll loop. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_CONT: + if (options->EmitNoCont) { + linker_warning(shader_program, + "Couldn't lower continue-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; case OPCODE_BGNSUB: inst->function->inst = i; mesa_inst->Comment = strdup(inst->function->sig->function_name()); From 3bb2f0dde1cd813a0b5e0b45be376f4d6606aeb8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 16:41:39 -0700 Subject: [PATCH 189/600] i915: Fail without crashing if a Mesa IR program uses too many registers This can only happen in GLSL shaders because assembly shaders that use too many temps are rejected by core Mesa. It is easiest to make this happen with shaders that contain flow-control that could not be lowered. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i915/i915_fragprog.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 6e1d7092237..32050cebf33 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -303,7 +303,7 @@ do { \ /* * TODO: consider moving this into core */ -static void calc_live_regs( struct i915_fragment_program *p ) +static bool calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; @@ -317,6 +317,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index > 16) + return false; + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; if (live_components[inst->DstReg.Index] == 0) regsUsed &= ~(1 << inst->DstReg.Index); @@ -327,6 +330,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { unsigned c; + if (inst->SrcReg[a].Index > 16) + return false; + regsUsed |= 1 << inst->SrcReg[a].Index; for (c = 0; c < 4; c++) { @@ -340,6 +346,8 @@ static void calc_live_regs( struct i915_fragment_program *p ) p->usedRegs[i] = regsUsed; } + + return true; } static GLuint get_live_regs( struct i915_fragment_program *p, @@ -394,7 +402,10 @@ upload_program(struct i915_fragment_program *p) /* Not always needed: */ - calc_live_regs(p); + if (!calc_live_regs(p)) { + i915_program_error(p, "Could not allocate registers"); + return; + } while (1) { GLuint src0, src1, src2, flags; From 0290a018a50bd4a3180af3233f145f4de7b63706 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 10 Feb 2011 13:20:26 -0800 Subject: [PATCH 190/600] i915: Only emit program errors when INTEL_DEBUG=wm or INTEL_DEBUG=fallbacks This makes piglit a lot more happy. The errors are logged when INTEL_DEBUG=fallbacks because the application is about to hit a big software fallback. We frequently ask people to run applications that are hitting software fallbacks with INTEL_DEBUG=fallbacks so the we can help them debug the reason for the software fallback. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i915/i915_program.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index ca1949b223e..0a600d30bef 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) void i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - va_list args; + if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) { + va_list args; - fprintf(stderr, "i915_program_error: "); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } p->error = 1; } From be7407b75b12c70e1925c10117937ae2b9e6711f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Aug 2011 01:04:58 +0200 Subject: [PATCH 191/600] gallium/util: add functions for manipulating swizzles Some of those have been in drivers already. --- src/gallium/auxiliary/util/u_format.c | 50 +++++++++++++++++++ src/gallium/auxiliary/util/u_format.h | 19 +++++++ src/gallium/drivers/r300/r300_fs.c | 5 +- src/gallium/drivers/r300/r300_state_derived.c | 18 +------ src/gallium/drivers/r300/r300_texture.c | 14 +----- src/gallium/drivers/r300/r300_texture.h | 4 -- src/gallium/drivers/r600/r600_texture.c | 6 +-- 7 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 9cbdd0a5b99..3a8aeab5fed 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -390,3 +390,53 @@ util_format_translate(enum pipe_format dst_format, FREE(tmp_row); } } + +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? + swz1[swz2[i]] : swz2[i]; + } +} + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + if (swz[i] < UTIL_FORMAT_SWIZZLE_W) + dst[i] = src[swz[i]]; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_0) + dst[i] = 0; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_1) + dst[i] = 1; + } +} + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + switch (swz[i]) { + case UTIL_FORMAT_SWIZZLE_X: + dst[0] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Y: + dst[1] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + dst[2] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + dst[3] = src[i]; + break; + } + } +} diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index bb3ed72e932..566fa79e781 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -815,6 +815,25 @@ util_format_translate(enum pipe_format dst_format, unsigned src_x, unsigned src_y, unsigned width, unsigned height); +/* + * Swizzle operations. + */ + +/* Compose two sets of swizzles. + * If V is a 4D vector and the function parameters represent functions that + * swizzle vector components, this holds: + * swz2(swz1(V)) = dst(V) + */ +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]); + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + #ifdef __cplusplus } // extern "C" { #endif diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index a9fd3ad40dd..6f21125f70a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -180,9 +180,10 @@ static void get_external_state( v->base.format == PIPE_FORMAT_LATC1_SNORM) { unsigned char swizzle[4]; - util_format_combine_swizzles(swizzle, + util_format_compose_swizzles( util_format_description(v->base.format)->swizzle, - v->swizzle); + v->swizzle, + swizzle); state->unit[i].texture_swizzle = RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f63114e7eb7..45c11fce1fe 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -605,7 +605,6 @@ static uint32_t r300_get_border_color(enum pipe_format format, { const struct util_format_description *desc; float border_swizzled[4] = {0}; - unsigned i; union util_color uc = {0}; desc = util_format_description(format); @@ -629,22 +628,7 @@ static uint32_t r300_get_border_color(enum pipe_format format, } /* Apply inverse swizzle of the format. */ - for (i = 0; i < 4; i++) { - switch (desc->swizzle[i]) { - case UTIL_FORMAT_SWIZZLE_X: - border_swizzled[0] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Y: - border_swizzled[1] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Z: - border_swizzled[2] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_W: - border_swizzled[3] = border[i]; - break; - } - } + util_format_unswizzle_4f(border_swizzled, border, desc->swizzle); /* Compressed formats. */ if (util_format_is_compressed(format)) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 08fccbe51c5..fc84004fb97 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -38,18 +38,6 @@ #include "pipe/p_screen.h" -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? - swz1[swz2[i]] : swz2[i]; - } -} - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle) @@ -72,7 +60,7 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, if (swizzle_view) { /* Combine two sets of swizzles. */ - util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view); + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 4586bb2e4dc..158a387478f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,10 +35,6 @@ struct r300_texture_desc; struct r300_resource; struct r300_screen; -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2); - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index e9e8b277243..927eb5dafc9 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -754,11 +754,7 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, }; if (swizzle_view) { - /* Combine two sets of swizzles. */ - for (i = 0; i < 4; i++) { - swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? - swizzle_format[swizzle_view[i]] : swizzle_view[i]; - } + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } From f6df430a85141f6a384c18079fb5b2ad848dac0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 18:45:30 +0200 Subject: [PATCH 192/600] r600g: remove unused code --- src/gallium/drivers/r600/r600.h | 1 - src/gallium/winsys/r600/drm/Makefile | 1 - src/gallium/winsys/r600/drm/SConscript | 1 - src/gallium/winsys/r600/drm/bof.c | 477 ------------------ src/gallium/winsys/r600/drm/bof.h | 90 ---- .../winsys/r600/drm/evergreen_hw_context.c | 1 - src/gallium/winsys/r600/drm/r600_hw_context.c | 83 --- 7 files changed, 654 deletions(-) delete mode 100644 src/gallium/winsys/r600/drm/bof.c delete mode 100644 src/gallium/winsys/r600/drm/bof.h diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 61adc7ed988..d2b03418ede 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -291,7 +291,6 @@ void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r6 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx); -void r600_context_dump_bof(struct r600_context *ctx, const char *file); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index fb7b09b3a0d..1d0de31c65a 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -5,7 +5,6 @@ include $(TOP)/configs/current LIBNAME = r600winsys C_SOURCES = \ - bof.c \ evergreen_hw_context.c \ radeon_bo.c \ radeon_pciid.c \ diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index f55bb265226..efcedc6bff9 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -3,7 +3,6 @@ Import('*') env = env.Clone() r600_sources = [ - 'bof.c', 'evergreen_hw_context.c', 'radeon_bo.c', 'radeon_pciid.c', diff --git a/src/gallium/winsys/r600/drm/bof.c b/src/gallium/winsys/r600/drm/bof.c deleted file mode 100644 index 5c923ad38d6..00000000000 --- a/src/gallium/winsys/r600/drm/bof.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include -#include -#include -#include "bof.h" - -/* - * helpers - */ -static int bof_entry_grow(bof_t *bof) -{ - bof_t **array; - - if (bof->array_size < bof->nentry) - return 0; - array = realloc(bof->array, (bof->nentry + 16) * sizeof(void*)); - if (array == NULL) - return -ENOMEM; - bof->array = array; - bof->nentry += 16; - return 0; -} - -/* - * object - */ -bof_t *bof_object(void) -{ - bof_t *object; - - object = calloc(1, sizeof(bof_t)); - if (object == NULL) - return NULL; - object->refcount = 1; - object->type = BOF_TYPE_OBJECT; - object->size = 12; - return object; -} - -bof_t *bof_object_get(bof_t *object, const char *keyname) -{ - unsigned i; - - for (i = 0; i < object->array_size; i += 2) { - if (!strcmp(object->array[i]->value, keyname)) { - return object->array[i + 1]; - } - } - return NULL; -} - -int bof_object_set(bof_t *object, const char *keyname, bof_t *value) -{ - bof_t *key; - int r; - - if (object->type != BOF_TYPE_OBJECT) - return -EINVAL; - r = bof_entry_grow(object); - if (r) - return r; - key = bof_string(keyname); - if (key == NULL) - return -ENOMEM; - object->array[object->array_size++] = key; - object->array[object->array_size++] = value; - object->size += value->size; - object->size += key->size; - bof_incref(value); - return 0; -} - -/* - * array - */ -bof_t *bof_array(void) -{ - bof_t *array = bof_object(); - - if (array == NULL) - return NULL; - array->type = BOF_TYPE_ARRAY; - array->size = 12; - return array; -} - -int bof_array_append(bof_t *array, bof_t *value) -{ - int r; - if (array->type != BOF_TYPE_ARRAY) - return -EINVAL; - r = bof_entry_grow(array); - if (r) - return r; - array->array[array->array_size++] = value; - array->size += value->size; - bof_incref(value); - return 0; -} - -bof_t *bof_array_get(bof_t *bof, unsigned i) -{ - if (!bof_is_array(bof) || i >= bof->array_size) - return NULL; - return bof->array[i]; -} - -unsigned bof_array_size(bof_t *bof) -{ - if (!bof_is_array(bof)) - return 0; - return bof->array_size; -} - -/* - * blob - */ -bof_t *bof_blob(unsigned size, void *value) -{ - bof_t *blob = bof_object(); - - if (blob == NULL) - return NULL; - blob->type = BOF_TYPE_BLOB; - blob->value = calloc(1, size); - if (blob->value == NULL) { - bof_decref(blob); - return NULL; - } - blob->size = size; - memcpy(blob->value, value, size); - blob->size += 12; - return blob; -} - -unsigned bof_blob_size(bof_t *bof) -{ - if (!bof_is_blob(bof)) - return 0; - return bof->size - 12; -} - -void *bof_blob_value(bof_t *bof) -{ - if (!bof_is_blob(bof)) - return NULL; - return bof->value; -} - -/* - * string - */ -bof_t *bof_string(const char *value) -{ - bof_t *string = bof_object(); - - if (string == NULL) - return NULL; - string->type = BOF_TYPE_STRING; - string->size = strlen(value) + 1; - string->value = calloc(1, string->size); - if (string->value == NULL) { - bof_decref(string); - return NULL; - } - strcpy(string->value, value); - string->size += 12; - return string; -} - -/* - * int32 - */ -bof_t *bof_int32(int32_t value) -{ - bof_t *int32 = bof_object(); - - if (int32 == NULL) - return NULL; - int32->type = BOF_TYPE_INT32; - int32->size = 4; - int32->value = calloc(1, int32->size); - if (int32->value == NULL) { - bof_decref(int32); - return NULL; - } - memcpy(int32->value, &value, 4); - int32->size += 12; - return int32; -} - -int32_t bof_int32_value(bof_t *bof) -{ - return *((uint32_t*)bof->value); -} - -/* - * common - */ -static void bof_indent(int level) -{ - int i; - - for (i = 0; i < level; i++) - fprintf(stderr, " "); -} - -static void bof_print_bof(bof_t *bof, int level, int entry) -{ - bof_indent(level); - if (bof == NULL) { - fprintf(stderr, "--NULL-- for entry %d\n", entry); - return; - } - switch (bof->type) { - case BOF_TYPE_STRING: - fprintf(stderr, "%p string [%s %d]\n", bof, (char*)bof->value, bof->size); - break; - case BOF_TYPE_INT32: - fprintf(stderr, "%p int32 [%d %d]\n", bof, *(int*)bof->value, bof->size); - break; - case BOF_TYPE_BLOB: - fprintf(stderr, "%p blob [%d]\n", bof, bof->size); - break; - case BOF_TYPE_NULL: - fprintf(stderr, "%p null [%d]\n", bof, bof->size); - break; - case BOF_TYPE_OBJECT: - fprintf(stderr, "%p object [%d %d]\n", bof, bof->array_size / 2, bof->size); - break; - case BOF_TYPE_ARRAY: - fprintf(stderr, "%p array [%d %d]\n", bof, bof->array_size, bof->size); - break; - default: - fprintf(stderr, "%p unknown [%d]\n", bof, bof->type); - return; - } -} - -static void bof_print_rec(bof_t *bof, int level, int entry) -{ - unsigned i; - - bof_print_bof(bof, level, entry); - for (i = 0; i < bof->array_size; i++) { - bof_print_rec(bof->array[i], level + 2, i); - } -} - -void bof_print(bof_t *bof) -{ - bof_print_rec(bof, 0, 0); -} - -static int bof_read(bof_t *root, FILE *file, long end, int level) -{ - bof_t *bof = NULL; - int r; - - if (ftell(file) >= end) { - return 0; - } - r = bof_entry_grow(root); - if (r) - return r; - bof = bof_object(); - if (bof == NULL) - return -ENOMEM; - bof->offset = ftell(file); - r = fread(&bof->type, 4, 1, file); - if (r != 1) - goto out_err; - r = fread(&bof->size, 4, 1, file); - if (r != 1) - goto out_err; - r = fread(&bof->array_size, 4, 1, file); - if (r != 1) - goto out_err; - switch (bof->type) { - case BOF_TYPE_STRING: - case BOF_TYPE_INT32: - case BOF_TYPE_BLOB: - bof->value = calloc(1, bof->size - 12); - if (bof->value == NULL) { - goto out_err; - } - r = fread(bof->value, bof->size - 12, 1, file); - if (r != 1) { - fprintf(stderr, "error reading %d\n", bof->size - 12); - goto out_err; - } - break; - case BOF_TYPE_NULL: - return 0; - case BOF_TYPE_OBJECT: - case BOF_TYPE_ARRAY: - r = bof_read(bof, file, bof->offset + bof->size, level + 2); - if (r) - goto out_err; - break; - default: - fprintf(stderr, "invalid type %d\n", bof->type); - goto out_err; - } - root->array[root->centry++] = bof; - return bof_read(root, file, end, level); -out_err: - bof_decref(bof); - return -EINVAL; -} - -bof_t *bof_load_file(const char *filename) -{ - bof_t *root = bof_object(); - int r; - - if (root == NULL) { - fprintf(stderr, "%s failed to create root object\n", __func__); - return NULL; - } - root->file = fopen(filename, "r"); - if (root->file == NULL) - goto out_err; - r = fseek(root->file, 0L, SEEK_SET); - if (r) { - fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename); - goto out_err; - } - root->offset = ftell(root->file); - r = fread(&root->type, 4, 1, root->file); - if (r != 1) - goto out_err; - r = fread(&root->size, 4, 1, root->file); - if (r != 1) - goto out_err; - r = fread(&root->array_size, 4, 1, root->file); - if (r != 1) - goto out_err; - r = bof_read(root, root->file, root->offset + root->size, 2); - if (r) - goto out_err; - return root; -out_err: - bof_decref(root); - return NULL; -} - -void bof_incref(bof_t *bof) -{ - bof->refcount++; -} - -void bof_decref(bof_t *bof) -{ - unsigned i; - - if (bof == NULL) - return; - if (--bof->refcount > 0) - return; - for (i = 0; i < bof->array_size; i++) { - bof_decref(bof->array[i]); - bof->array[i] = NULL; - } - bof->array_size = 0; - if (bof->file) { - fclose(bof->file); - bof->file = NULL; - } - free(bof->array); - free(bof->value); - free(bof); -} - -static int bof_file_write(bof_t *bof, FILE *file) -{ - unsigned i; - int r; - - r = fwrite(&bof->type, 4, 1, file); - if (r != 1) - return -EINVAL; - r = fwrite(&bof->size, 4, 1, file); - if (r != 1) - return -EINVAL; - r = fwrite(&bof->array_size, 4, 1, file); - if (r != 1) - return -EINVAL; - switch (bof->type) { - case BOF_TYPE_NULL: - if (bof->size) - return -EINVAL; - break; - case BOF_TYPE_STRING: - case BOF_TYPE_INT32: - case BOF_TYPE_BLOB: - r = fwrite(bof->value, bof->size - 12, 1, file); - if (r != 1) - return -EINVAL; - break; - case BOF_TYPE_OBJECT: - case BOF_TYPE_ARRAY: - for (i = 0; i < bof->array_size; i++) { - r = bof_file_write(bof->array[i], file); - if (r) - return r; - } - break; - default: - return -EINVAL; - } - return 0; -} - -int bof_dump_file(bof_t *bof, const char *filename) -{ - unsigned i; - int r = 0; - - if (bof->file) { - fclose(bof->file); - bof->file = NULL; - } - bof->file = fopen(filename, "w"); - if (bof->file == NULL) { - fprintf(stderr, "%s failed to open file %s\n", __func__, filename); - r = -EINVAL; - goto out_err; - } - r = fseek(bof->file, 0L, SEEK_SET); - if (r) { - fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename); - goto out_err; - } - r = fwrite(&bof->type, 4, 1, bof->file); - if (r != 1) - goto out_err; - r = fwrite(&bof->size, 4, 1, bof->file); - if (r != 1) - goto out_err; - r = fwrite(&bof->array_size, 4, 1, bof->file); - if (r != 1) - goto out_err; - for (i = 0; i < bof->array_size; i++) { - r = bof_file_write(bof->array[i], bof->file); - if (r) - return r; - } -out_err: - fclose(bof->file); - bof->file = NULL; - return r; -} diff --git a/src/gallium/winsys/r600/drm/bof.h b/src/gallium/winsys/r600/drm/bof.h deleted file mode 100644 index 014affb74f1..00000000000 --- a/src/gallium/winsys/r600/drm/bof.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef BOF_H -#define BOF_H - -#include -#include - -#define BOF_TYPE_STRING 0 -#define BOF_TYPE_NULL 1 -#define BOF_TYPE_BLOB 2 -#define BOF_TYPE_OBJECT 3 -#define BOF_TYPE_ARRAY 4 -#define BOF_TYPE_INT32 5 - -struct bof; - -typedef struct bof { - struct bof **array; - unsigned centry; - unsigned nentry; - unsigned refcount; - FILE *file; - uint32_t type; - uint32_t size; - uint32_t array_size; - void *value; - long offset; -} bof_t; - -extern int bof_file_flush(bof_t *root); -extern bof_t *bof_file_new(const char *filename); -extern int bof_object_dump(bof_t *object, const char *filename); - -/* object */ -extern bof_t *bof_object(void); -extern bof_t *bof_object_get(bof_t *object, const char *keyname); -extern int bof_object_set(bof_t *object, const char *keyname, bof_t *value); -/* array */ -extern bof_t *bof_array(void); -extern int bof_array_append(bof_t *array, bof_t *value); -extern bof_t *bof_array_get(bof_t *bof, unsigned i); -extern unsigned bof_array_size(bof_t *bof); -/* blob */ -extern bof_t *bof_blob(unsigned size, void *value); -extern unsigned bof_blob_size(bof_t *bof); -extern void *bof_blob_value(bof_t *bof); -/* string */ -extern bof_t *bof_string(const char *value); -/* int32 */ -extern bof_t *bof_int32(int32_t value); -extern int32_t bof_int32_value(bof_t *bof); -/* common functions */ -extern void bof_decref(bof_t *bof); -extern void bof_incref(bof_t *bof); -extern bof_t *bof_load_file(const char *filename); -extern int bof_dump_file(bof_t *bof, const char *filename); -extern void bof_print(bof_t *bof); - -static inline int bof_is_object(bof_t *bof){return (bof->type == BOF_TYPE_OBJECT);} -static inline int bof_is_blob(bof_t *bof){return (bof->type == BOF_TYPE_BLOB);} -static inline int bof_is_null(bof_t *bof){return (bof->type == BOF_TYPE_NULL);} -static inline int bof_is_int32(bof_t *bof){return (bof->type == BOF_TYPE_INT32);} -static inline int bof_is_array(bof_t *bof){return (bof->type == BOF_TYPE_ARRAY);} -static inline int bof_is_string(bof_t *bof){return (bof->type == BOF_TYPE_STRING);} - -#endif diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 60d2e289396..5729fdd6326 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -32,7 +32,6 @@ #include "r600.h" #include "evergreend.h" #include "radeon_drm.h" -#include "bof.h" #include "pipe/p_compiler.h" #include "util/u_inlines.h" #include "util/u_memory.h" diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 07bd544d1a0..f1150712b23 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -35,7 +35,6 @@ #include "xf86drm.h" #include "radeon_drm.h" #include "r600_priv.h" -#include "bof.h" #include "r600d.h" #define GROUP_FORCE_NEW_BLOCK 0 @@ -1615,88 +1614,6 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo); } -void r600_context_dump_bof(struct r600_context *ctx, const char *file) -{ - bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; - unsigned i; - - root = device_id = bcs = blob = array = bo = size = handle = NULL; - root = bof_object(); - if (root == NULL) - goto out_err; - device_id = bof_int32(ctx->radeon->device); - if (device_id == NULL) - goto out_err; - if (bof_object_set(root, "device_id", device_id)) - goto out_err; - bof_decref(device_id); - device_id = NULL; - /* dump relocs */ - blob = bof_blob(ctx->creloc * 16, ctx->reloc); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "reloc", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump cs */ - blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "pm4", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump bo */ - array = bof_array(); - if (array == NULL) - goto out_err; - for (i = 0; i < ctx->creloc; i++) { - struct radeon_bo *rbo = ctx->bo[i]; - bo = bof_object(); - if (bo == NULL) - goto out_err; - size = bof_int32(rbo->size); - if (size == NULL) - goto out_err; - if (bof_object_set(bo, "size", size)) - goto out_err; - bof_decref(size); - size = NULL; - handle = bof_int32(rbo->handle); - if (handle == NULL) - goto out_err; - if (bof_object_set(bo, "handle", handle)) - goto out_err; - bof_decref(handle); - handle = NULL; - radeon_bo_map(ctx->radeon, rbo); - blob = bof_blob(rbo->size, rbo->data); - radeon_bo_unmap(ctx->radeon, rbo); - if (blob == NULL) - goto out_err; - if (bof_object_set(bo, "data", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - if (bof_array_append(array, bo)) - goto out_err; - bof_decref(bo); - bo = NULL; - } - if (bof_object_set(root, "bo", array)) - goto out_err; - bof_dump_file(root, file); -out_err: - bof_decref(blob); - bof_decref(array); - bof_decref(bo); - bof_decref(size); - bof_decref(handle); - bof_decref(device_id); - bof_decref(root); -} - static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) { unsigned results_base = query->results_start; From e69dde5233a2fc6ad4c5483d079e1ea3a2123a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 23 Jul 2011 04:29:59 +0200 Subject: [PATCH 193/600] r600g: remove dummy function r600_bo_offset Always returned 0. --- src/gallium/drivers/r600/evergreen_state.c | 20 +++++++++---------- src/gallium/drivers/r600/r600.h | 7 ++----- src/gallium/drivers/r600/r600_state.c | 18 ++++++++--------- src/gallium/drivers/r600/r600_state_common.c | 3 +-- .../winsys/r600/drm/evergreen_hw_context.c | 2 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 12 +++++------ 6 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bc6039dd40c..c9eaf94a2ae 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1023,8 +1023,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) | S_030004_TEX_DEPTH(texture->depth0 - 1) | S_030004_ARRAY_MODE(array_mode)); - rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[0] >> 8; + rstate->val[3] = tmp->offset[1] >> 8; rstate->val[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian) | @@ -1354,7 +1354,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL); @@ -1407,18 +1407,18 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo); if (stencil_format) { uint32_t stencil_offset; stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo); } r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); @@ -2265,7 +2265,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | @@ -2339,7 +2339,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -2356,7 +2356,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, + 0, 0xFFFFFFFF, ve->fetch_shader); } diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index d2b03418ede..2e759c79409 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -105,11 +105,8 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, - unsigned stride, struct winsys_handle *whandle); -static INLINE unsigned r600_bo_offset(struct r600_bo *bo) -{ - return 0; -} + unsigned stride, struct winsys_handle *whandle); + void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); /* this relies on the pipe_reference being the first member of r600_bo */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 1350a1cf565..487b1df0052 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1077,8 +1077,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[offset_level] >> 8; + rstate->val[3] = tmp->offset[offset_level+1] >> 8; rstate->val[4] = (word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_038010_REQUEST_SIZE(1) | @@ -1441,7 +1441,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, color_info, 0xFFFFFFFF, bo[0]); @@ -1455,10 +1455,10 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0280E0_CB_COLOR0_FRAG + cb * 4, - r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + 0, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_0280C0_CB_COLOR0_TILE + cb * 4, - r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + 0, 0xFFFFFFFF, bo[2]); r600_pipe_state_add_reg(rstate, R_028100_CB_COLOR0_MASK + cb * 4, 0x00000000, 0xFFFFFFFF, NULL); @@ -1492,7 +1492,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), 0xFFFFFFFF, NULL); @@ -2027,7 +2027,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_028850_SQ_PGM_RESOURCES_PS, S_028868_NUM_GPRS(rshader->bc.ngpr) | @@ -2106,7 +2106,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -2127,7 +2127,7 @@ void r600_fetch_shader(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, + 0, 0xFFFFFFFF, ve->fetch_shader); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 408eaed491b..9f3ab89fdf7 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -418,7 +418,6 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); - offset += r600_bo_offset(rbuffer->r.bo); switch (shader) { case PIPE_SHADER_VERTEX: @@ -518,7 +517,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (vertex_buffer == NULL || rbuffer == NULL) continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + offset += vertex_buffer->buffer_offset; if (!rstate->id) { if (rctx->chip_class >= EVERGREEN) { diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 5729fdd6326..98283ffbefc 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1202,7 +1202,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[3] = draw->vgt_num_instances; if (draw->indices) { pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[5] = draw->indices_bo_offset; pm4[6] = 0; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index f1150712b23..35c086ae680 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1468,7 +1468,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) pm4[3] = draw->vgt_num_instances; if (draw->indices) { pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[5] = draw->indices_bo_offset; pm4[6] = 0; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; @@ -1710,14 +1710,14 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); @@ -1735,14 +1735,14 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); @@ -1789,7 +1789,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, /* emit predicate packets for all data blocks */ while (results_base != query->results_end) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - ctx->pm4[ctx->pm4_cdwords++] = results_base + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = results_base; ctx->pm4[ctx->pm4_cdwords++] = op; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; From 0f1aae3ae7cef051f87dae056c46fcfd0afaab20 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 1 Aug 2011 16:06:59 -0700 Subject: [PATCH 194/600] intel: Fix unused variable warning. --- src/mesa/drivers/dri/intel/intel_fbo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index e48d6ef9cbd..65ad621e770 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -596,7 +596,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, struct intel_texture_image *intel_image, int zoffset) { - struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ From 7cf799d47269ce01d3e5981709744a16b7c2756c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:36:57 -0700 Subject: [PATCH 195/600] radeon: Remove set-but-unused color_mask variable. This has been around since the initial import in 2003 and never used. --- src/mesa/drivers/dri/r200/r200_ioctl.c | 3 --- src/mesa/drivers/dri/radeon/radeon_ioctl.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 02201cb53d6..44a794da396 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if ( R200_DEBUG & RADEON_IOCTL ) { @@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a91d8727792..c23e9c2d2a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) r100ContextPtr rmesa = R100_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { @@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } From 25fffa9364baef76a7e7e875be1fb3c4f10aadfd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:39:43 -0700 Subject: [PATCH 196/600] radeon: Remove set-but-unused log2depth variable. r100 doesn't support 3D GL_EXT_texture3D. --- src/mesa/drivers/dri/radeon/radeon_texstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 9ba98e303a7..3abaa1504a4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -1018,7 +1018,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit ) static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { const struct gl_texture_image *firstImage; - GLint log2Width, log2Height, log2Depth, texelBytes; + GLint log2Width, log2Height, texelBytes; if ( t->bo ) { return GL_TRUE; @@ -1033,7 +1033,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; - log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { From f5e612ab594689c7736f8af082e88c107bd7582c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:41:59 -0700 Subject: [PATCH 197/600] radeon: Remove set-but-unused variables in radeonSetTexBuffer2() variants. These have been unused since 2009. --- src/mesa/drivers/dri/r200/r200_texstate.c | 6 ------ src/mesa/drivers/dri/r300/r300_texstate.c | 5 ----- src/mesa/drivers/dri/r600/evergreen_tex.c | 7 ------- src/mesa/drivers/dri/r600/r600_texstate.c | 5 ----- src/mesa/drivers/dri/radeon/radeon_texstate.c | 6 ------ 5 files changed, 29 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7adf9ad73ed..8c9bd6d00b2 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r200ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e24ad6f088d..e4388a021ed 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 9784a8484f2..d240a216817 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - context_t *rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 949db29c189..65fae7195fd 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 3abaa1504a4..430309392a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r100ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = _mesa_get_current_tex_unit(radeon->glCtx); From 8de1d42f244f6315c471b01ef52a61f61d227c6d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:47:18 -0700 Subject: [PATCH 198/600] radeon: Remove set-but-unused variables in radeon_lock.c These have been unused since this function's introduction in the FBO support development around 2009. --- src/mesa/drivers/dri/radeon/radeon_lock.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 7b6bd36dcf7..ae8a212f806 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon ) { char ret = 0; - struct radeon_framebuffer *rfb = NULL; - struct radeon_renderbuffer *rrb = NULL; - - if (radeon_get_drawable(radeon)) { - rfb = radeon_get_drawable(radeon)->driverPrivate; - - if (rfb) - rrb = radeon_get_renderbuffer(&rfb->base, - rfb->base._ColorDrawBufferIndexes[0]); - } if (!radeon->radeonScreen->driScreen->dri2.enabled) { if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) From e0e4c2e30552e524c91b2eb98a2dabdcd4666169 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:49:05 -0700 Subject: [PATCH 199/600] radeon: Remove some remaining set-but-unused variables. These looked more like copy-and-paste to me than the others (which looked more like possibly someone forgot to write some code in a refactor), so I didn't verify where they came from. --- src/mesa/drivers/dri/r600/r600_cmdbuf.c | 2 -- src/mesa/drivers/dri/radeon/radeon_common.c | 8 -------- src/mesa/drivers/dri/radeon/radeon_common_context.c | 2 -- src/mesa/drivers/dri/radeon/radeon_cs_legacy.c | 2 -- 4 files changed, 14 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index ce2f7779563..74f048b1062 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)csi->csm; relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: for (i = 0; i < csi->crelocs; i++) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index bfc307ca987..e7a6623cf84 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; - struct radeon_framebuffer *rfb; GLint nbox, i, ret; assert(dPriv); @@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, LOCK_HARDWARE(rmesa); - rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreen *psp; - struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; assert(dPriv); @@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; rfb = dPriv->driverPrivate; - rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - - psp = dPriv->driScreenPriv; LOCK_HARDWARE(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index bf8925f61d0..c08b79484af 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon) __DRIcontext *driContext = radeon->dri.context; __DRIdrawable *drawable; __DRIscreen *screen; - struct radeon_framebuffer *draw; screen = driContext->driScreenPriv; if (!screen->dri2.loader) @@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon) radeon_update_renderbuffers(driContext, drawable, GL_FALSE); /* Intel driver does the equivalent of this, no clue if it is needed:*/ - draw = drawable->driverPrivate; radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index c2722a4e195..5595b705b15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs, static int cs_process_relocs(struct radeon_cs_int *cs) { - struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; int i, j, r; - csm = (struct cs_manager_legacy*)cs->csm; relocs = (struct cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) From b5e39405831092d8cf7943318c92b750325eb31e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 01:13:06 +0200 Subject: [PATCH 200/600] util: fix a typo in util_format_swizzle_4f Reported by Gustaw Smolarczyk. --- src/gallium/auxiliary/util/u_format.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 3a8aeab5fed..34922ab18ab 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -409,7 +409,7 @@ void util_format_swizzle_4f(float *dst, const float *src, unsigned i; for (i = 0; i < 4; i++) { - if (swz[i] < UTIL_FORMAT_SWIZZLE_W) + if (swz[i] <= UTIL_FORMAT_SWIZZLE_W) dst[i] = src[swz[i]]; else if (swz[i] == UTIL_FORMAT_SWIZZLE_0) dst[i] = 0; From 2664980760c5cf2e7dde4065f9cc8e8b865627c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 00:52:55 +0200 Subject: [PATCH 201/600] winsys/radeon: remove dummy function pb_buffer --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 17 +++++++---------- src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 6 ------ 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 796262ccfdb..2eb9d134407 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -89,7 +89,7 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) static void radeon_bo_wait(struct pb_buffer *_buf) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct drm_radeon_gem_wait_idle args = {}; while (p_atomic_read(&bo->num_active_ioctls)) { @@ -105,7 +105,7 @@ static void radeon_bo_wait(struct pb_buffer *_buf) static boolean radeon_bo_is_busy(struct pb_buffer *_buf) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct drm_radeon_gem_busy args = {}; boolean busy; @@ -395,16 +395,14 @@ static void *radeon_bo_map(struct pb_buffer *buf, struct radeon_winsys_cs *cs, enum pipe_transfer_usage usage) { - struct pb_buffer *_buf = pb_buffer(buf); - - return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs); + return pb_map(buf, get_pb_usage_from_transfer_flags(usage), cs); } static void radeon_bo_get_tiling(struct pb_buffer *_buf, enum radeon_bo_layout *microtiled, enum radeon_bo_layout *macrotiled) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct drm_radeon_gem_set_tiling args = {}; args.handle = bo->handle; @@ -429,7 +427,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf, enum radeon_bo_layout macrotiled, uint32_t pitch) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct drm_radeon_gem_set_tiling args = {}; @@ -464,8 +462,7 @@ static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle( struct pb_buffer *_buf) { /* return radeon_bo. */ - return (struct radeon_winsys_cs_handle*) - get_radeon_bo(pb_buffer(_buf)); + return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf); } static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain) @@ -586,7 +583,7 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, struct winsys_handle *whandle) { struct drm_gem_flink flink = {}; - struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer)); + struct radeon_bo *bo = get_radeon_bo(buffer); if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!bo->flinked) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index b94881bc4ce..f4ea73a2210 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -80,10 +80,4 @@ void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src) pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); } -static INLINE struct pb_buffer * -pb_buffer(struct pb_buffer *buffer) -{ - return (struct pb_buffer *)buffer; -} - #endif From 6eb94fc3444a300a0419c40cfcf356fdd88bc304 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 3 Aug 2011 01:04:19 +0400 Subject: [PATCH 202/600] r600g: use backend mask for occlusion queries Use backend_map kernel query if supported, otherwise analyze ZPASS_DONE results to get the mask. Fixes lockups with predicated rendering due to incorrect query buffer initialization on some cards. Note: this is a candidate for the 7.11 branch. Signed-off-by: Vadim Girlin Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 4 + .../winsys/r600/drm/evergreen_hw_context.c | 2 + src/gallium/winsys/r600/drm/r600_drm.c | 59 ++++++++++++ src/gallium/winsys/r600/drm/r600_hw_context.c | 96 ++++++++++++++++++- src/gallium/winsys/r600/drm/r600_priv.h | 3 + 5 files changed, 160 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 2e759c79409..2ac5ed465c1 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -94,6 +94,8 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); unsigned r600_get_clock_crystal_freq(struct radeon *radeon); unsigned r600_get_minor_version(struct radeon *radeon); unsigned r600_get_num_backends(struct radeon *radeon); +unsigned r600_get_num_tile_pipes(struct radeon *radeon); +unsigned r600_get_backend_map(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; @@ -258,6 +260,7 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; + unsigned backend_mask; struct list_head fenced_bo; unsigned max_db; /* for OQ */ unsigned num_dest_buffers; @@ -279,6 +282,7 @@ struct r600_draw { struct r600_bo *indices; }; +void r600_get_backend_mask(struct r600_context *ctx); int r600_context_init(struct r600_context *ctx, struct radeon *radeon); void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 98283ffbefc..7fe2050cd84 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1018,6 +1018,8 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) LIST_INITHEAD(&ctx->fenced_bo); + r600_get_backend_mask(ctx); + return 0; out_err: r600_context_fini(ctx); diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index ab0afea5bf5..8aa8c3df52a 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -50,6 +50,14 @@ #define RADEON_INFO_NUM_BACKENDS 0xa #endif +#ifndef RADEON_INFO_NUM_TILE_PIPES +#define RADEON_INFO_NUM_TILE_PIPES 0xb +#endif + +#ifndef RADEON_INFO_BACKEND_MAP +#define RADEON_INFO_BACKEND_MAP 0xd +#endif + enum radeon_family r600_get_family(struct radeon *r600) { return r600->family; @@ -75,6 +83,16 @@ unsigned r600_get_num_backends(struct radeon *radeon) return radeon->num_backends; } +unsigned r600_get_num_tile_pipes(struct radeon *radeon) +{ + return radeon->num_tile_pipes; +} + +unsigned r600_get_backend_map(struct radeon *radeon) +{ + return radeon->backend_map; +} + unsigned r600_get_minor_version(struct radeon *radeon) { return radeon->minor_version; @@ -241,6 +259,42 @@ static int radeon_get_num_backends(struct radeon *radeon) return 0; } +static int radeon_get_num_tile_pipes(struct radeon *radeon) +{ + struct drm_radeon_info info = {}; + uint32_t num_tile_pipes = 0; + int r; + + info.request = RADEON_INFO_NUM_TILE_PIPES; + info.value = (uintptr_t)&num_tile_pipes; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + if (r) + return r; + + radeon->num_tile_pipes = num_tile_pipes; + return 0; +} + +static int radeon_get_backend_map(struct radeon *radeon) +{ + struct drm_radeon_info info = {}; + uint32_t backend_map = 0; + int r; + + info.request = RADEON_INFO_BACKEND_MAP; + info.value = (uintptr_t)&backend_map; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + if (r) + return r; + + radeon->backend_map = backend_map; + radeon->backend_map_valid = TRUE; + + return 0; +} + static int radeon_init_fence(struct radeon *radeon) { @@ -362,6 +416,11 @@ static struct radeon *radeon_new(int fd, unsigned device) if (radeon->minor_version >= 9) radeon_get_num_backends(radeon); + if (radeon->minor_version >= 11) { + radeon_get_num_tile_pipes(radeon); + radeon_get_backend_map(radeon); + } + radeon->bomgr = r600_bomgr_create(radeon, 1000000); if (radeon->bomgr == NULL) { return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 35c086ae680..30af4e8066f 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -39,6 +39,91 @@ #define GROUP_FORCE_NEW_BLOCK 0 +/* Get backends mask */ +void r600_get_backend_mask(struct r600_context *ctx) +{ + struct r600_bo * buffer; + u32 * results; + unsigned num_backends = r600_get_num_backends(ctx->radeon); + unsigned i, mask = 0; + + /* if backend_map query is supported by the kernel */ + if (ctx->radeon->backend_map_valid) { + unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon); + unsigned backend_map = r600_get_backend_map(ctx->radeon); + unsigned item_width, item_mask; + + if (ctx->radeon->chip_class >= EVERGREEN) { + item_width = 4; + item_mask = 0x7; + } else { + item_width = 2; + item_mask = 0x3; + } + + while(num_tile_pipes--) { + i = backend_map & item_mask; + mask |= (1<>= item_width; + } + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + } + + /* otherwise backup path for older kernels */ + + /* create buffer for event data */ + buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0, + PIPE_USAGE_STAGING); + if (!buffer) + goto err; + + /* initialize buffer with zeroes */ + results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_WRITE, NULL); + if (results) { + memset(results, 0, ctx->max_db * 4 * 4); + r600_bo_unmap(ctx->radeon, buffer); + + /* emit EVENT_WRITE for ZPASS_DONE */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], buffer); + + /* execute */ + r600_context_flush(ctx); + + /* analyze results */ + results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL); + if (results) { + for(i = 0; i < ctx->max_db; i++) { + /* at least highest bit will be set if backend is used */ + if (results[i*4 + 1]) + mask |= (1<radeon, buffer); + } + } + + r600_bo_reference(ctx->radeon, &buffer, NULL); + + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + +err: + /* fallback to old method - set num_backends lower bits to 1 */ + ctx->backend_mask = (~((u32)0))>>(32-num_backends); + return; +} + static inline void r600_context_ps_partial_flush(struct r600_context *ctx) { if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) @@ -898,6 +983,8 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->max_db = 4; + r600_get_backend_mask(ctx); + return 0; out_err: r600_context_fini(ctx); @@ -1652,7 +1739,6 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned required_space, new_results_end; - int num_backends = r600_get_num_backends(ctx->radeon); /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ if (query->type == PIPE_QUERY_TIME_ELAPSED) @@ -1698,9 +1784,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) memset(results, 0, query->result_size); /* Set top bits for unused backends */ - for (i = num_backends; i < ctx->max_db; i++) { - results[(i * 4)+1] = 0x80000000; - results[(i * 4)+3] = 0x80000000; + for (i = 0; i < ctx->max_db; i++) { + if (!(ctx->backend_mask & (1<radeon, query->buffer); } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 69f7251c043..75115fdaed7 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -55,6 +55,9 @@ struct radeon { struct r600_bo *fence_bo; unsigned clock_crystal_freq; unsigned num_backends; + unsigned num_tile_pipes; + unsigned backend_map; + boolean backend_map_valid; unsigned minor_version; /* List of buffer handles and its mutex. */ From 2bde0cc95d8db10b6d2c6689ca39c196a81248b0 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 3 Aug 2011 15:35:02 +0400 Subject: [PATCH 203/600] r600g: take into account force_add_cf in pops When we have two ENDIFs in a row, we shouldn't modify the pop_count for the same alu clause twice. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=38163 Note: this is a candidate for the 7.11 branch. Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/r600_shader.c | 37 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index fc56656f55d..c55cdd707eb 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2932,25 +2932,34 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - int alu_pop = 3; - if (ctx->bc->cf_last) { - if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) - alu_pop = 0; - else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) - alu_pop = 1; + unsigned force_pop = ctx->bc->force_add_cf; + + if (!force_pop) { + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + force_pop = 1; + } } - alu_pop += pops; - if (alu_pop == 1) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else if (alu_pop == 2) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else { + + if (force_pop) { r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); ctx->bc->cf_last->pop_count = pops; ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; } + return 0; } From babb26776fadb683be9dacb492efcdc455b176ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 03:23:12 +0200 Subject: [PATCH 204/600] r600g: remove more of unused code This is a follow-up to f6df430a85141f6a384c18079fb5b2ad848dac0d. --- src/gallium/drivers/r600/r600_pipe.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 615f0688eb3..5159ba350e4 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -119,22 +119,9 @@ static void r600_flush(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence **rfence = (struct r600_fence**)fence; -#if 0 - static int dc = 0; - char dname[256]; -#endif - if (rfence) *rfence = r600_create_fence(rctx); -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 20) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif r600_context_flush(&rctx->ctx); } From eeed782ecb9fa92a958cb650c0a5a536556dc611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?RALOVICH=2C=20Krist=C3=B3f?= Date: Sun, 31 Jul 2011 23:49:43 +0200 Subject: [PATCH 205/600] gbm/dri: avoid crash in dri_screen_create --- src/gbm/backends/dri/gbm_dri.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 6bb7848d830..9de8cb61162 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -194,6 +194,8 @@ dri_screen_create(struct gbm_dri_device *dri) dri->screen = dri->dri2->createNewScreen(0, dri->base.base.fd, dri->extensions, &dri->driver_configs, dri); + if (dri->screen == NULL) + return -1; extensions = dri->core->getExtensions(dri->screen); if (dri_bind_extensions(dri, dri_core_extensions, extensions) < 0) { From ca6bbfd76960731926c99d0b6257b42344596794 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Thu, 4 Aug 2011 13:37:42 +0200 Subject: [PATCH 206/600] gbm: link gbm_gallium_drm.so against math library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids the following runtime error with EGL on platforms that require linking with libm for nontrivial math functions: failed to load module: /xorg/lib64/gbm/gbm_gallium_drm.so: undefined symbol: powf (Based on Kristóf RALOVICHs patch and Ian's suggestions in http://lists.freedesktop.org/archives/mesa-dev/2011-August/010036.html) --- src/gallium/targets/gbm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index 3ad3eca1d13..b38782c4070 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -15,7 +15,7 @@ GBM_INCLUDES = \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) \ +GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \ $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ From 32f4cf38085e4056b8e4a9fc78fea28897a1d05f Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Wed, 29 Jun 2011 08:49:39 +0200 Subject: [PATCH 207/600] egl/gbm: Fix EGL_DEFAULT_DISPLAY --- src/egl/drivers/dri2/egl_dri2.c | 7 ++++++ src/egl/drivers/dri2/egl_dri2.h | 1 + src/egl/drivers/dri2/platform_drm.c | 25 +++++++++++++++++-- .../state_trackers/egl/drm/native_drm.c | 23 +++++++++++++---- .../state_trackers/egl/drm/native_drm.h | 4 +++ 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 0aca929e6aa..9a37ea4bbfc 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -591,6 +591,13 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) case _EGL_PLATFORM_WAYLAND: wl_display_destroy(dri2_dpy->wl_dpy); break; +#endif +#ifdef HAVE_DRM_PLATFORM + case _EGL_PLATFORM_DRM: + if (dri2_dpy->own_gbm_device) { + gbm_device_destroy(&dri2_dpy->gbm_dri->base.base); + } + break; #endif default: break; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 3854200bc69..a7297188af2 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -86,6 +86,7 @@ struct dri2_egl_display #ifdef HAVE_DRM_PLATFORM struct gbm_dri_device *gbm_dri; + int own_gbm_device; #endif char *device_name; diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 579baf9f9d2..04b10e279ec 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -30,6 +30,10 @@ #include #include #include +#include +#include +#include +#include #include "egl_dri2.h" @@ -90,6 +94,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) { struct dri2_egl_display *dri2_dpy; struct gbm_device *gbm; + int fd = -1; int i; dri2_dpy = malloc(sizeof *dri2_dpy); @@ -100,7 +105,15 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) disp->DriverData = (void *) dri2_dpy; - gbm = (struct gbm_device *) disp->PlatformDisplay; + gbm = disp->PlatformDisplay; + if (gbm == NULL) { + fd = open("/dev/dri/card0", O_RDWR); + dri2_dpy->own_gbm_device = 1; + gbm = gbm_create_device(fd); + if (gbm == NULL) + return EGL_FALSE; + } + if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) { free(dri2_dpy); return EGL_FALSE; @@ -112,7 +125,15 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) return EGL_FALSE; } - dri2_dpy->fd = gbm_device_get_fd(gbm); + if (fd < 0) { + fd = dup(gbm_device_get_fd(gbm)); + if (fd < 0) { + free(dri2_dpy); + return EGL_FALSE; + } + } + + dri2_dpy->fd = fd; dri2_dpy->device_name = dri2_get_device_name_for_fd(dri2_dpy->fd); dri2_dpy->driver_name = dri2_dpy->gbm_dri->base.driver_name; diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index 47910de8d3c..c013769e57d 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -134,8 +134,11 @@ drm_display_destroy(struct native_display *ndpy) if (drmdpy->device_name) FREE(drmdpy->device_name); - if (drmdpy->fd >= 0) - close(drmdpy->fd); + if (drmdpy->own_gbm) { + gbm_device_destroy(&drmdpy->gbmdrm->base.base); + if (drmdpy->fd >= 0) + close(drmdpy->fd); + } FREE(drmdpy); } @@ -258,7 +261,7 @@ drm_display_init_screen(struct native_display *ndpy) } static struct native_display * -drm_create_display(struct gbm_gallium_drm_device *gbmdrm, +drm_create_display(struct gbm_gallium_drm_device *gbmdrm, int own_gbm, const struct native_event_handler *event_handler) { struct drm_display *drmdpy; @@ -267,6 +270,8 @@ drm_create_display(struct gbm_gallium_drm_device *gbmdrm, if (!drmdpy) return NULL; + drmdpy->gbmdrm = gbmdrm; + drmdpy->own_gbm = own_gbm; drmdpy->fd = gbmdrm->base.base.fd; drmdpy->device_name = drm_get_device_name(drmdpy->fd); @@ -302,22 +307,30 @@ native_create_display(void *dpy, boolean use_sw) { struct gbm_gallium_drm_device *gbm; int fd; + int own_gbm = 0; gbm = dpy; if (gbm == NULL) { fd = open("/dev/dri/card0", O_RDWR); + /* FIXME: Use an internal constructor to create a gbm + * device with gallium backend directly, without setenv */ + setenv("GBM_BACKEND", "gbm_gallium_drm.so", 1); gbm = gbm_gallium_drm_device(gbm_create_device(fd)); + own_gbm = 1; } if (gbm == NULL) return NULL; if (strcmp(gbm_device_get_backend_name(&gbm->base.base), "drm") != 0 || - gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) + gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) { + if (own_gbm) + gbm_device_destroy(&gbm->base.base); return NULL; + } - return drm_create_display(gbm, drm_event_handler); + return drm_create_display(gbm, own_gbm, drm_event_handler); } static const struct native_platform drm_platform = { diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h index 675a58a1922..18cebf4e276 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.h +++ b/src/gallium/state_trackers/egl/drm/native_drm.h @@ -41,6 +41,8 @@ #include "common/native_wayland_drm_bufmgr_helper.h" #endif +#include "gbm_gallium_drmint.h" + struct drm_config; struct drm_crtc; struct drm_connector; @@ -52,6 +54,8 @@ struct drm_display { const struct native_event_handler *event_handler; + struct gbm_gallium_drm_device *gbmdrm; + int own_gbm; int fd; char *device_name; struct drm_config *config; From 57590e173b6f421b1015190aa3c0011ea55f31d8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 28 Jul 2011 15:26:01 +0200 Subject: [PATCH 208/600] st/mesa: determine Const.MaxSamples in init_extensions v2: Check for non-pow2 sample counts as well. --- src/mesa/state_tracker/st_extensions.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index b5f6d356eb0..8e900934054 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -228,6 +228,7 @@ void st_init_extensions(struct st_context *st) { struct pipe_screen *screen = st->pipe->screen; struct gl_context *ctx = st->ctx; + int i; /* * Extensions that are supported by all Gallium drivers: @@ -605,6 +606,16 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_packed_float = GL_TRUE; } + /* Maximum sample count. */ + for (i = 16; i > 0; --i) { + if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_TEXTURE_2D, i, + PIPE_BIND_RENDER_TARGET)) { + ctx->Const.MaxSamples = i; + break; + } + } + if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) { ctx->Extensions.ARB_seamless_cube_map = GL_TRUE; ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE; From 94822c6d83b7811db2a02bb4416df02ae225ba47 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 3 Aug 2011 15:43:16 +0200 Subject: [PATCH 209/600] gallium: extend resource_resolve to accommodate BlitFramebuffer Resolve via glBlitFramebuffer allows resolving a sub-region of a renderbuffer to a different location in any mipmap level of some other texture, and, with a new extension, even scaling. Therefore, location and size parameters are needed. The mask parameter was added because resolving only depth or only stencil of a combined buffer is possible as well. Full information about the blit operation allows the drivers to take the most efficient path they possibly can. --- src/gallium/docs/source/context.rst | 9 ++++++++- src/gallium/include/pipe/p_context.h | 8 +++----- src/gallium/include/pipe/p_defines.h | 4 ++++ src/gallium/include/pipe/p_state.h | 28 ++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 25a3245066c..3faf801b4b1 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -329,8 +329,15 @@ textured quad blitter.. The source and destination may be the same resource, but overlapping blits are not permitted. ``resource_resolve`` resolves a multisampled resource into a non-multisampled -one. Formats and dimensions must match. This function must be present if a driver +one. Their formats must match. This function must be present if a driver supports multisampling. +The region that is to be resolved is described by ``pipe_resolve_info``, which +provides a source and a destination rectangle. +The source rectangle may be vertically flipped, but otherwise the dimensions +of the rectangles must match, unless PIPE_CAP_SCALED_RESOLVE is supported, +in which case scaling and horizontal flipping are allowed as well. +The result of resolving depth/stencil values may be any function of the values at +the sample points, but returning the value of the centermost sample is preferred. The interfaces to these calls are likely to change to make it easier for a driver to batch multiple blits with the same source and diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 3f6d90d1bf4..da3ee87515f 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -49,6 +49,7 @@ struct pipe_index_buffer; struct pipe_query; struct pipe_poly_stipple; struct pipe_rasterizer_state; +struct pipe_resolve_info; struct pipe_resource; struct pipe_sampler_state; struct pipe_sampler_view; @@ -268,13 +269,10 @@ struct pipe_context { /** * Resolve a multisampled resource into a non-multisampled one. - * Source and destination must have the same size and same format. + * Source and destination must be of the same format. */ void (*resource_resolve)(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned dst_layer, - struct pipe_resource *src, - unsigned src_layer); + const struct pipe_resolve_info *info); /*@}*/ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 79b89699566..7ffdf97fdfb 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -99,6 +99,9 @@ enum pipe_error { #define PIPE_MASK_B 0x4 #define PIPE_MASK_A 0x8 #define PIPE_MASK_RGBA 0xf +#define PIPE_MASK_Z 0x10 +#define PIPE_MASK_S 0x20 +#define PIPE_MASK_ZS 0x30 /** @@ -468,6 +471,7 @@ enum pipe_cap { PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46, PIPE_CAP_SEAMLESS_CUBE_MAP = 47, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48, + PIPE_CAP_SCALED_RESOLVE = 49 }; /* Shader caps not specific to any single stage */ diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index d442c15c02a..840b3ee0e37 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -483,6 +483,34 @@ struct pipe_draw_info }; +/** + * Information to describe a resource_resolve call. + */ +struct pipe_resolve_info +{ + struct { + struct pipe_resource *res; + unsigned level; + unsigned layer; + int x0; /**< always left */ + int y0; /**< always top */ + int x1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */ + int y1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */ + } dst; + + struct { + struct pipe_resource *res; + unsigned layer; + int x0; + int y0; + int x1; /**< may be < x0 only if PIPE_CAP_SCALED_RESOLVE is supported */ + int y1; /**< may be < y1 even if PIPE_CAP_SCALED_RESOLVE not supported */ + } src; + + unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */ +}; + + #ifdef __cplusplus } #endif From f253d83bc72e7d26df8cd3a04747b3d46a8543e6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 3 Aug 2011 16:01:41 +0200 Subject: [PATCH 210/600] st/mesa: implement multisample resolve via BlitFramebuffer --- src/mesa/state_tracker/st_cb_blit.c | 116 ++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 416be194d11..276d10fb557 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -61,6 +61,81 @@ st_destroy_blit(struct st_context *st) #if FEATURE_EXT_framebuffer_blit +static void +st_BlitFramebuffer_resolve(struct gl_context *ctx, + GLbitfield mask, + struct pipe_resolve_info *info) +{ + const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + + struct st_context *st = st_context(ctx); + + struct st_renderbuffer *srcRb, *dstRb; + + if (mask & GL_COLOR_BUFFER_BIT) { + srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + + info->mask = PIPE_MASK_RGBA; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & depthStencil) { + struct gl_renderbuffer_attachment *srcDepth, *srcStencil; + struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + + srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; + dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; + srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; + dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; + + const boolean combined = + st_is_depth_stencil_combined(srcDepth, srcStencil) && + st_is_depth_stencil_combined(dstDepth, dstStencil); + + if ((mask & GL_DEPTH_BUFFER_BIT) || combined) { + /* resolve depth and, if combined and requested, stencil as well */ + srcRb = st_renderbuffer(srcDepth->Renderbuffer); + dstRb = st_renderbuffer(dstDepth->Renderbuffer); + + info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) + info->mask |= PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & GL_STENCIL_BUFFER_BIT) { + /* resolve separate stencil buffer */ + srcRb = st_renderbuffer(srcStencil->Renderbuffer); + dstRb = st_renderbuffer(dstStencil->Renderbuffer); + + info->mask = PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + } +} + static void st_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, @@ -95,6 +170,42 @@ st_BlitFramebuffer(struct gl_context *ctx, srcY1 = readFB->Height - srcY1; } + /* Disable conditional rendering. */ + if (st->render_condition) { + st->pipe->render_condition(st->pipe, NULL, 0); + } + + if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) { + struct pipe_resolve_info info; + + if (dstX0 < dstX1) { + info.dst.x0 = dstX0; + info.dst.x1 = dstX1; + info.src.x0 = srcX0; + info.src.x1 = srcX1; + } else { + info.dst.x0 = dstX1; + info.dst.x1 = dstX0; + info.src.x0 = srcX1; + info.src.x1 = srcX0; + } + if (dstY0 < dstY1) { + info.dst.y0 = dstY0; + info.dst.y1 = dstY1; + info.src.y0 = srcY0; + info.src.y1 = srcY1; + } else { + info.dst.y0 = dstY1; + info.dst.y1 = dstY0; + info.src.y0 = srcY1; + info.src.y1 = srcY0; + } + + st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */ + + goto done; + } + if (srcY0 > srcY1 && dstY0 > dstY1) { /* Both src and dst are upside down. Swap Y to make it * right-side up to increase odds of using a fast path. @@ -109,11 +220,6 @@ st_BlitFramebuffer(struct gl_context *ctx, dstY1 = tmp; } - /* Disable conditional rendering. */ - if (st->render_condition) { - st->pipe->render_condition(st->pipe, NULL, 0); - } - if (mask & GL_COLOR_BUFFER_BIT) { struct gl_renderbuffer_attachment *srcAtt = &readFB->Attachment[readFB->_ColorReadBufferIndex]; From e9d84dab8817a0a7e463229b9a2820b00a9ce667 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 28 Jul 2011 15:54:53 +0200 Subject: [PATCH 211/600] nv50: implement resource_resolve with custom blit --- src/gallium/drivers/nv50/nv50_context.h | 3 +- src/gallium/drivers/nv50/nv50_formats.c | 4 +- src/gallium/drivers/nv50/nv50_screen.c | 4 + src/gallium/drivers/nv50/nv50_screen.h | 6 + src/gallium/drivers/nv50/nv50_shader_state.c | 11 +- .../drivers/nv50/nv50_state_validate.c | 4 +- src/gallium/drivers/nv50/nv50_surface.c | 536 +++++++++++++++++- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- 8 files changed, 559 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index c1226d5eb26..284db69e312 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -171,7 +171,8 @@ void nv50_validate_derived_rs(struct nv50_context *); extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -extern boolean nv50_state_validate(struct nv50_context *); +/* @words: check for space before emitting relocs */ +extern boolean nv50_state_validate(struct nv50_context *, unsigned words); /* nv50_surface.c */ extern void nv50_clear(struct pipe_context *, unsigned buffers, diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index be43147468a..34502d0a397 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -116,7 +116,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), + B_(C0, C1, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM, @@ -124,7 +124,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM, - B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), + B_(C1, C0, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 4139b85a9ae..07a74cc2b9f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -91,6 +91,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SCALED_RESOLVE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->grclass >= NVA0_3D; @@ -604,6 +605,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + if (!nv50_blitctx_create(screen)) + goto fail; + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); return pscreen; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 64ad209a728..315ca80c0d2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -21,6 +21,8 @@ struct nv50_context; #define NV50_SCREEN_RESIDENT_BO_COUNT 5 +struct nv50_blitctx; + struct nv50_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; @@ -39,6 +41,8 @@ struct nv50_screen { struct nouveau_resource *gp_code_heap; struct nouveau_resource *fp_code_heap; + struct nv50_blitctx *blitctx; + struct { void **entries; int next; @@ -71,6 +75,8 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +boolean nv50_blitctx_create(struct nv50_screen *); + void nv50_screen_make_buffers_resident(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index e5b10c37bef..d73f7c7f213 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -130,13 +130,14 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) int ret; unsigned size; - if (prog->translated) + if (!prog->translated) { + prog->translated = nv50_program_translate(prog); + if (!prog->translated) + return FALSE; + } else + if (prog->res) return TRUE; - prog->translated = nv50_program_translate(prog); - if (!prog->translated) - return FALSE; - if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; else if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8b0b08f8e93..44f2d25c1a7 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -350,7 +350,7 @@ static struct state_validate { #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) boolean -nv50_state_validate(struct nv50_context *nv50) +nv50_state_validate(struct nv50_context *nv50, unsigned words) { unsigned i; @@ -367,6 +367,8 @@ nv50_state_validate(struct nv50_context *nv50) nv50->dirty = 0; } + MARK_RING(nv50->screen->base.channel, words, 0); + nv50_bufctx_emit_relocs(nv50); return TRUE; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index eefbaad6483..1a5077e970b 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -368,7 +368,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ nv50->dirty &= NV50_NEW_FRAMEBUFFER; - if (!nv50_state_validate(nv50)) + if (!nv50_state_validate(nv50, 9 + (fb->nr_cbufs * 2))) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { @@ -405,12 +405,546 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER; } + +struct nv50_blitctx +{ + struct nv50_screen *screen; + struct { + struct pipe_framebuffer_state fb; + struct nv50_program *vp; + struct nv50_program *gp; + struct nv50_program *fp; + unsigned num_textures[3]; + unsigned num_samplers[3]; + struct pipe_sampler_view *texture; + struct nv50_tsc_entry *sampler; + unsigned dirty; + unsigned clip_nr; + } saved; + struct nv50_program vp; + struct nv50_program fp; + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + uint32_t fp_offset; + uint16_t color_mask; + uint8_t filter; +}; + +static void +nv50_blitctx_make_vp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + 0x10000001, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */ + 0x0423c788, + 0x10000205, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */ + 0x0423c788, + 0x10000409, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */ + 0x0423c788, + 0x1000060d, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */ + 0x0423c788, + 0x10000811, /* exit mov b32 o[0x10] s[0x10] */ /* TEXC.z */ + 0x0423c789, + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + blit->vp.code = (uint32_t *)code; /* const_cast */ + blit->vp.code_size = sizeof(code); + blit->vp.max_gpr = 4; + blit->vp.max_out = 5; + blit->vp.out_nr = 2; + blit->vp.out[0].mask = 0x3; + blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION; + blit->vp.out[1].hw = 2; + blit->vp.out[1].mask = 0x7; + blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC; + blit->vp.vp.attrs[0] = 0x73; + blit->vp.vp.psiz = 0x40; + blit->vp.vp.edgeflag = 0x40; +} + +static void +nv50_blitctx_make_fp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + /* 3 coords RGBA in, RGBA out, also for Z32_FLOAT(_S8X24_USCALED) */ + 0x80000000, /* interp $r0 v[0x0] */ + 0x80010004, /* interp $r1 v[0x4] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,2,3 } $t0 $s0 { $r0,1,2 } */ + 0x0000c785, /* exit */ + + /* 3 coords ZS in, S encoded in R, Z encoded in GBA (8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000201, /* cvt f32 $r0 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc0010001, /* mul f32 $r0 $r0 1/0xff */ + 0x03b8080b, + 0xd03f0405, /* and b32 $r1 $r2 0x0000ff */ + 0x0000000f, + 0xd000040d, /* and b32 $r3 $r2 0xff0000 */ + 0x000ff003, + 0xd0000409, /* and b32 $r2 $r2 0x00ff00 */ + 0x00000ff3, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa000060d, /* cvt f32 $r3 s32 $r3 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010205, /* mul f32 $r1 $r1 1/0x0000ff */ + 0x03b8080b, + 0xc001060d, /* mul f32 $r3 $r3 1/0x00ff00 */ + 0x0338080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0378080b, + 0xf0000001, /* exit never nop */ + 0xe0000001, + + /* 3 coords ZS in, Z encoded in RGB, S encoded in A (U8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000281, /* cvt f32 $r3 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc001060d, /* mul f32 $r3 $r3 1/0xff */ + 0x03b8080b, + 0xd03f0401, /* and b32 $r0 $r2 0x0000ff */ + 0x0000000f, + 0xd0000405, /* and b32 $r1 $r2 0x00ff00 */ + 0x00000ff3, + 0xd0000409, /* and b32 $r2 $r2 0xff0000 */ + 0x000ff003, + 0xa0000001, /* cvt f32 $r0 s32 $r0 */ + 0x44014780, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010001, /* mul f32 $r0 $r0 1/0x0000ff */ + 0x03b8080b, + 0xc0010205, /* mul f32 $r1 $r1 1/0x00ff00 */ + 0x0378080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0338080b, + 0xf0000001, /* exit never nop */ + 0xe0000001 + }; + + blit->fp.type = PIPE_SHADER_FRAGMENT; + blit->fp.translated = TRUE; + blit->fp.code = (uint32_t *)code; /* const_cast */ + blit->fp.code_size = sizeof(code); + blit->fp.max_gpr = 4; + blit->fp.max_out = 4; + blit->fp.in_nr = 1; + blit->fp.in[0].mask = 0x7; /* last component flat */ + blit->fp.in[0].linear = 1; + blit->fp.in[0].sn = TGSI_SEMANTIC_GENERIC; + blit->fp.out_nr = 1; + blit->fp.out[0].mask = 0xf; + blit->fp.out[0].sn = TGSI_SEMANTIC_COLOR; + blit->fp.fp.interp = 0x00020403; + blit->fp.gp.primid = 0x80; +} + +static void +nv50_blitctx_make_sampler(struct nv50_blitctx *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = 0x00000092; + blit->sampler[0].tsc[1] = 0x00000051; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = 0x00000092; + blit->sampler[1].tsc[1] = 0x00000062; +} + +/* Since shaders cannot export stencil, we cannot copy stencil values when + * rendering to ZETA, so we attach the ZS surface to a colour render target. + */ +static INLINE enum pipe_format +nv50_blit_zeta_to_colour_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z32_FLOAT: return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: return PIPE_FORMAT_R32G32_FLOAT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + +static void +nv50_blitctx_get_color_mask_and_fp(struct nv50_blitctx *blit, + enum pipe_format format, uint8_t mask) +{ + blit->color_mask = 0; + + switch (format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + blit->fp_offset = 160; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x0111; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x1000; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + blit->fp_offset = 24; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x1110; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x0001; + break; + default: + blit->fp_offset = 0; + if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) blit->color_mask |= 0x0001; + if (mask & (PIPE_MASK_G | PIPE_MASK_S)) blit->color_mask |= 0x0010; + if (mask & PIPE_MASK_B) blit->color_mask |= 0x0100; + if (mask & PIPE_MASK_A) blit->color_mask |= 0x1000; + break; + } +} + +static void +nv50_blit_set_dst(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(res->format)) + templ.format = nv50_blit_zeta_to_colour_format(res->format); + else + templ.format = res->format; + + templ.usage = PIPE_USAGE_STREAM; + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ); + nv50->framebuffer.nr_cbufs = 1; + nv50->framebuffer.zsbuf = NULL; + nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width; + nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height; +} + +static INLINE void +nv50_blit_fixup_tic_entry(struct pipe_sampler_view *view) +{ + struct nv50_tic_entry *ent = nv50_tic_entry(view); + + ent->tic[2] &= ~(1 << 31); /* scaled coordinates, ok with 3d textures ? */ + + /* magic: */ + + ent->tic[3] = 0x20000000; /* affects quality of near vertical edges in MS8 */ +} + +static void +nv50_blit_set_src(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_sampler_view templ; + + templ.format = res->format; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ); + + nv50_blit_fixup_tic_entry(nv50->textures[2][0]); + + nv50->num_textures[0] = nv50->num_textures[1] = 0; + nv50->num_textures[2] = 1; +} + +static void +nv50_blitctx_prepare_state(struct nv50_blitctx *blit) +{ + struct nouveau_channel *chan = blit->screen->base.channel; + + /* blend state */ + BEGIN_RING(chan, RING_3D(COLOR_MASK(0)), 1); + OUT_RING (chan, blit->color_mask); + BEGIN_RING(chan, RING_3D(BLEND_ENABLE(0)), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LOGIC_OP_ENABLE), 1); + OUT_RING (chan, 0); + + /* rasterizer state */ +#ifndef NV50_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 1); + OUT_RING (chan, 1); +#endif + BEGIN_RING(chan, RING_3D(VERTEX_TWO_SIDE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + BEGIN_RING(chan, RING_3D(POLYGON_MODE_FRONT), 3); + OUT_RING (chan, NV50_3D_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV50_3D_POLYGON_MODE_BACK_FILL); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CULL_FACE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + OUT_RING (chan, 0); + + /* zsa state */ + BEGIN_RING(chan, RING_3D(DEPTH_TEST_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(STENCIL_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 1); + OUT_RING (chan, 0); +} + +static void +nv50_blitctx_pre_blit(struct nv50_blitctx *blit, struct nv50_context *nv50) +{ + int s; + + blit->saved.fb.width = nv50->framebuffer.width; + blit->saved.fb.height = nv50->framebuffer.height; + blit->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs; + blit->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0]; + blit->saved.fb.zsbuf = nv50->framebuffer.zsbuf; + + blit->saved.vp = nv50->vertprog; + blit->saved.gp = nv50->gmtyprog; + blit->saved.fp = nv50->fragprog; + + nv50->vertprog = &blit->vp; + nv50->gmtyprog = NULL; + nv50->fragprog = &blit->fp; + + blit->saved.clip_nr = nv50->clip.nr; + + nv50->clip.nr = 0; + + for (s = 0; s < 3; ++s) { + blit->saved.num_textures[s] = nv50->num_textures[s]; + blit->saved.num_samplers[s] = nv50->num_samplers[s]; + } + blit->saved.texture = nv50->textures[2][0]; + blit->saved.sampler = nv50->samplers[2][0]; + + nv50->samplers[2][0] = &blit->sampler[blit->filter]; + + nv50->num_samplers[0] = nv50->num_samplers[1] = 0; + nv50->num_samplers[2] = 1; + + blit->saved.dirty = nv50->dirty; + + nv50->dirty = + NV50_NEW_FRAMEBUFFER | + NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; +} + +static void +nv50_blitctx_post_blit(struct nv50_context *nv50, struct nv50_blitctx *blit) +{ + int s; + + pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL); + + nv50->framebuffer.width = blit->saved.fb.width; + nv50->framebuffer.height = blit->saved.fb.height; + nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nv50->vertprog = blit->saved.vp; + nv50->gmtyprog = blit->saved.gp; + nv50->fragprog = blit->saved.fp; + + nv50->clip.nr = blit->saved.clip_nr; + + pipe_sampler_view_reference(&nv50->textures[2][0], NULL); + + for (s = 0; s < 3; ++s) { + nv50->num_textures[s] = blit->saved.num_textures[s]; + nv50->num_samplers[s] = blit->saved.num_samplers[s]; + } + nv50->textures[2][0] = blit->saved.texture; + nv50->samplers[2][0] = blit->saved.sampler; + + nv50->dirty = blit->saved.dirty | + (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | + NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); +} + +static void +nv50_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nv50_blitctx *blit = screen->blitctx; + struct nouveau_channel *chan = screen->base.channel; + struct pipe_resource *src = info->src.res; + struct pipe_resource *dst = info->dst.res; + float x0, x1, y0, y1, z; + float x_range, y_range; + + nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask); + + blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1; + + nv50_blitctx_pre_blit(blit, nv50); + + nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer); + nv50_blit_set_src(nv50, src, 0, info->src.layer); + + nv50_blitctx_prepare_state(blit); + + nv50_state_validate(nv50, 36); + + x_range = + (float)(info->src.x1 - info->src.x0) / + (float)(info->dst.x1 - info->dst.x0); + y_range = + (float)(info->src.y1 - info->src.y0) / + (float)(info->dst.y1 - info->dst.y0); + + x0 = (float)info->src.x0 - x_range * (float)info->dst.x0; + y0 = (float)info->src.y0 - y_range * (float)info->dst.y0; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + z = (float)info->src.layer; + + BEGIN_RING(chan, RING_3D(FP_START_ID), 1); + OUT_RING (chan, + blit->fp.code_base + blit->fp_offset); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 0); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (info->dst.x1 << 16) | info->dst.x0); + OUT_RING (chan, (info->dst.y1 << 16) | info->dst.y0); + + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x1); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_x); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y1); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_y); + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + /* re-enable normally constant state */ + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + + nv50_blitctx_post_blit(nv50, blit); +} + +boolean +nv50_blitctx_create(struct nv50_screen *screen) +{ + screen->blitctx = CALLOC_STRUCT(nv50_blitctx); + if (!screen->blitctx) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + screen->blitctx->screen = screen; + + nv50_blitctx_make_vp(screen->blitctx); + nv50_blitctx_make_fp(screen->blitctx); + + nv50_blitctx_make_sampler(screen->blitctx); + + screen->blitctx->color_mask = 0x1111; + + return TRUE; +} + void nv50_init_surface_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; pipe->resource_copy_region = nv50_resource_copy_region; + pipe->resource_resolve = nv50_resource_resolve; pipe->clear_render_target = nv50_clear_render_target; pipe->clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f23008ae4cf..1c8347a793a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -647,7 +647,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) nv50_update_user_vbufs(nv50); - nv50_state_validate(nv50); + nv50_state_validate(nv50, 8); /* 8 as minimum, we use flush_notify here */ chan->flush_notify = nv50_draw_vbo_flush_notify; From 88a4f2fe543d7c394c0ad732ae60f8cf94c0d357 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:30 -0600 Subject: [PATCH 212/600] mesa: make error handling in glGetTexParameter() a bit more concise --- src/mesa/main/texparam.c | 270 ++++++++++++++------------------------- 1 file changed, 98 insertions(+), 172 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 134f15346e8..78dcc5dccea 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -994,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = 0; break; case GL_TEXTURE_DEPTH_SIZE_ARB: - if (ctx->Extensions.ARB_depth_texture) - *params = _mesa_get_format_bits(texFormat, pname); - else + if (!ctx->Extensions.ARB_depth_texture) goto invalid_pname; + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_STENCIL_SIZE_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil || - ctx->Extensions.ARB_framebuffer_object) { - *params = _mesa_get_format_bits(texFormat, pname); - } - else { + if (!ctx->Extensions.EXT_packed_depth_stencil && + !ctx->Extensions.ARB_framebuffer_object) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_SHARED_SIZE: - if (ctx->VersionMajor >= 3 || - ctx->Extensions.EXT_texture_shared_exponent) { - *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; - } - else { + if (ctx->VersionMajor < 3 && + !ctx->Extensions.EXT_texture_shared_exponent) goto invalid_pname; - } + *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; break; /* GL_ARB_texture_compression */ @@ -1036,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, /* GL_ARB_texture_float */ case GL_TEXTURE_RED_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_GREEN_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_BLUE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_ALPHA_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_LUMINANCE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_INTENSITY_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_DEPTH_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; default: @@ -1118,7 +1090,6 @@ void GLAPIENTRY _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1187,49 +1158,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = (GLfloat) obj->MaxLevel; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = obj->Sampler.MaxAnisotropy; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = obj->Sampler.CompareFailValue; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = obj->Sampler.CompareFailValue; break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLfloat) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareFunc; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLfloat) obj->Sampler.DepthMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = obj->Sampler.LodBias; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1244,46 +1203,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = (GLfloat) obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { + if (!ctx->Extensions.EXT_texture_swizzle) { + goto invalid_pname; + } + else { GLuint comp; for (comp = 0; comp < 4; comp++) { params[comp] = (GLfloat) obj->Swizzle[comp]; } } - else { - error = GL_TRUE; - } break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CubeMapSeamless; break; default: - error = GL_TRUE; - break; + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", - pname); - + /* no error if we get here */ _mesa_unlock_texture(ctx, obj); + return; + +invalid_pname: + _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname); } @@ -1291,7 +1244,6 @@ void GLAPIENTRY _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1355,55 +1307,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) *params = obj->MaxLevel; break;; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = (GLint) obj->Sampler.MaxAnisotropy; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = (GLint) obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLint) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareFunc; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLint) obj->Sampler.DepthMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = (GLint) obj->Sampler.LodBias; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = (GLint) obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1417,42 +1351,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - COPY_4V(params, obj->Swizzle); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + COPY_4V(params, obj->Swizzle); break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLint) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.CubeMapSeamless; break; default: - ; /* silence warnings */ + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", - pname); - + /* no error if we get here */ _mesa_unlock_texture(ctx, obj); + return; + +invalid_pname: + _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname); } From 1254a2b2e45c6961a57d9c60f561907183ef7de7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: [PATCH 213/600] mesa: condense GL_TEXTURE_RESIDENT query code --- src/mesa/main/texparam.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 78dcc5dccea..0dec0172989 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1133,14 +1133,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) } break; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = ENUM_TO_FLOAT(resident); - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1.0F; break; case GL_TEXTURE_PRIORITY: *params = obj->Priority; @@ -1282,14 +1276,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) } break;; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = (GLint) resident; - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1; break;; case GL_TEXTURE_PRIORITY: *params = FLOAT_TO_INT(obj->Priority); From 02d81dfcaf073b5f7073d405e931b3d3e9f577ef Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: [PATCH 214/600] mesa: add null ptr checks in GetTexParameterI[u]iv() functions --- src/mesa/main/texparam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 0dec0172989..97d0359f170 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1379,6 +1379,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: @@ -1399,6 +1401,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: From 1e89a526c6cd21852b440904711c5ee733ce1ad2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: [PATCH 215/600] mesa: whitespace, formatting fixes in GetTexParameter() code --- src/mesa/main/texparam.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 97d0359f170..bbbb306b2d9 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1115,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) + if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if(ctx->Color._ClampFragmentColor) - { + if (ctx->Color._ClampFragmentColor) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F); } - else - { + else { params[0] = obj->Sampler.BorderColor.f[0]; params[1] = obj->Sampler.BorderColor.f[1]; params[2] = obj->Sampler.BorderColor.f[2]; @@ -1241,9 +1239,9 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - obj = get_texobj(ctx, target, GL_TRUE); - if (!obj) - return; + obj = get_texobj(ctx, target, GL_TRUE); + if (!obj) + return; _mesa_lock_texture(ctx, obj); switch (pname) { From 192baaac0fc4701e82dcc3e19b3033f81dd82a62 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: [PATCH 216/600] mesa: minor comment updates in enable.c --- src/mesa/main/enable.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index aac8b9c5eaf..3ba4df6342f 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -5,7 +5,6 @@ /* * Mesa 3-D graphics library - * Version: 7.0.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) ctx->Polygon.OffsetLine = state; break; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ if (ctx->Polygon.OffsetFill == state) return; FLUSH_VERTICES(ctx, _NEW_POLYGON); @@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: case GL_NORMAL_ARRAY: case GL_COLOR_ARRAY: @@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap ) case GL_POLYGON_OFFSET_LINE: return ctx->Polygon.OffsetLine; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ return ctx->Polygon.OffsetFill; case GL_RESCALE_NORMAL_EXT: return ctx->Transform.RescaleNormals; @@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap ) } #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: return (ctx->Array.ArrayObj->Vertex.Enabled != 0); case GL_NORMAL_ARRAY: From 09ba2527e885f6134002205716a44d01d83638c3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: [PATCH 217/600] st/mesa: move declaration before code --- src/mesa/state_tracker/st_cb_blit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 276d10fb557..626db12431d 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -90,13 +90,14 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx, if (mask & depthStencil) { struct gl_renderbuffer_attachment *srcDepth, *srcStencil; struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + boolean combined; srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; - const boolean combined = + combined = st_is_depth_stencil_combined(srcDepth, srcStencil) && st_is_depth_stencil_combined(dstDepth, dstStencil); From 50073563b2bfe3716b3dc8b1ed2f91381ba24305 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: [PATCH 218/600] st/mesa: silence int/float and double/float conversion warnings --- src/mesa/state_tracker/st_cb_bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..067403f396b 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -328,8 +328,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, if(!normalized) { - sRight = width; - tBot = height; + sRight = (GLfloat) width; + tBot = (GLfloat) height; } /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as @@ -381,7 +381,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, /* same for all verts: */ for (i = 0; i < 4; i++) { st->bitmap.vertices[i][0][2] = z; - st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][0][3] = 1.0f; st->bitmap.vertices[i][1][0] = color[0]; st->bitmap.vertices[i][1][1] = color[1]; st->bitmap.vertices[i][1][2] = color[2]; @@ -513,7 +513,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_vertex_elements(cso, 3, st->velems_util_draw); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; + z = z * 2.0f - 1.0f; /* draw textured quad */ offset = setup_bitmap_vertex_data(st, From b7e89115310628310bf458a33f2df2bf23384cf3 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 2 Aug 2011 11:36:44 -0500 Subject: [PATCH 219/600] glsl_to_tgsi: remove debugging printf --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ba4074eecd5..b5f4253ea64 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -840,7 +840,6 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, int index = 0; immediate_storage *entry; assert(file == PROGRAM_IMMEDIATE); - fprintf(stderr, "adding immediate\n"); /* Search immediate storage to see if we already have an identical * immediate that we can use instead of adding a duplicate entry. From 9adcab9cd464d659288e31e6767efb5dee3894ff Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 4 Aug 2011 10:15:54 -0500 Subject: [PATCH 220/600] st/mesa: replace duplicated create_color_map_texture() function with shared function --- .../state_tracker/st_atom_pixeltransfer.c | 22 +----------------- src/mesa/state_tracker/st_cb_drawpixels.c | 23 +------------------ src/mesa/state_tracker/st_texture.c | 20 ++++++++++++++++ src/mesa/state_tracker/st_texture.h | 4 ++++ 4 files changed, 26 insertions(+), 43 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 95b706cb96c..12b5bc5ba79 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx, struct state_key *key) } -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. */ @@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f4dd2a42847..0c4dc23ccf7 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,27 +94,6 @@ is_passthrough_program(const struct gl_fragment_program *prog) } -/* XXX copied verbatim from st_atom_pixeltransfer.c */ -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Returns a fragment program which implements the current pixel transfer ops. */ @@ -142,7 +121,7 @@ get_glsl_pixel_transfer_program(struct st_context *st, if (pixelMaps) { /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ffe7e256a56..d8ba3ac9252 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe, } } + +struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index d50c3c9af79..b822f47cf9e 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -232,4 +232,8 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_resource *src, GLuint srcLevel, GLuint face); + +extern struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx); + #endif From d6a0692f9dc055c5e5f0e7c806537ad24aa13709 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 13:07:50 -0600 Subject: [PATCH 221/600] mesa: don't use K&R style function pointer calls --- src/mesa/main/texobj.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 3021716a0b6..078a43ab153 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures ) struct gl_texture_object *texObj; GLuint name = first + i; GLenum target = 0; - texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target); + texObj = ctx->Driver.NewTextureObject(ctx, name, target); if (!texObj) { _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex); _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures"); @@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) } else { /* if this is a new texture id, allocate a texture object now */ - newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target); + newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target); if (!newTexObj) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture"); return; @@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) /* Pass BindTexture call to device driver */ if (ctx->Driver.BindTexture) - (*ctx->Driver.BindTexture)( ctx, target, newTexObj ); + ctx->Driver.BindTexture(ctx, target, newTexObj); } From 1c8d079e205919b24e04efdc2421c18d03f078ff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 15:32:09 -0600 Subject: [PATCH 222/600] mesa: fix out of bounds array access in rtgc debug code Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39841 This would only be hit if someone set RGTC_DEBUG=1. --- src/mesa/main/texcompress_rgtc_tmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h index c8bf082a158..48bbd374e08 100644 --- a/src/mesa/main/texcompress_rgtc_tmp.h +++ b/src/mesa/main/texcompress_rgtc_tmp.h @@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4], fprintf(stderr, "%d ", alphaenc1[i]); } fprintf(stderr, "cutVals "); - for (i = 0; i < 8; i++) { + for (i = 0; i < 7; i++) { fprintf(stderr, "%d ", acutValues[i]); } fprintf(stderr, "srcVals "); From d7f2e38fca38a5521e930242be46be5a70a9cbd3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 15:55:13 -0600 Subject: [PATCH 223/600] mesa: add st_glsl_to_tgsi.cpp to Sconscript --- src/mesa/SConscript | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 24e2155c387..cbd16625186 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -264,6 +264,7 @@ statetracker_sources = [ 'state_tracker/st_draw_feedback.c', 'state_tracker/st_extensions.c', 'state_tracker/st_format.c', + 'state_tracker/st_glsl_to_tgsi.cpp', 'state_tracker/st_gen_mipmap.c', 'state_tracker/st_manager.c', 'state_tracker/st_mesa_to_tgsi.c', From a0eb83401ef599e597b72e70c8856e1bc0f59dcc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 15:55:50 -0600 Subject: [PATCH 224/600] mesa: use gl_constant_value type in _mesa_[Get]ProgramNamedParameter4fNV() --- src/mesa/main/nvprogram.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c index dd198b8141a..7ff7645b7b7 100644 --- a/src/mesa/main/nvprogram.c +++ b/src/mesa/main/nvprogram.c @@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - GLfloat *v; + gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - v[0] = x; - v[1] = y; - v[2] = z; - v[3] = w; + v[0].f = x; + v[1].f = y; + v[2].f = z; + v[3].f = w; return; } @@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - const GLfloat *v; + const gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); @@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - params[0] = v[0]; - params[1] = v[1]; - params[2] = v[2]; - params[3] = v[3]; + params[0] = v[0].f; + params[1] = v[1].f; + params[2] = v[2].f; + params[3] = v[3].f; return; } From 324857599b2a4735c86e54da9a1776c034dadf72 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 16:00:06 -0600 Subject: [PATCH 225/600] mesa: use gl_constant_value type in ARB program parser --- src/mesa/program/program_parse.y | 56 +++++++++++++++---------------- src/mesa/program/program_parser.h | 3 +- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index dbf5abaa617..dec35038be5 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector; paramConstScalarDecl: signedFloatConstant { $$.count = 4; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } ; paramConstScalarUse: REAL { $$.count = 1; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } | INTEGER { $$.count = 1; - $$.data[0] = (float) $1; - $$.data[1] = (float) $1; - $$.data[2] = (float) $1; - $$.data[3] = (float) $1; + $$.data[0].f = (float) $1; + $$.data[1].f = (float) $1; + $$.data[2].f = (float) $1; + $$.data[3].f = (float) $1; } ; paramConstVector: '{' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = 0.0f; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = 0.0f; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = $8; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = $8; } ; diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h index 8e5aaee95e5..5637598f3b3 100644 --- a/src/mesa/program/program_parser.h +++ b/src/mesa/program/program_parser.h @@ -23,6 +23,7 @@ #pragma once #include "main/config.h" +#include "program/prog_parameter.h" struct gl_context; @@ -96,7 +97,7 @@ struct asm_symbol { struct asm_vector { unsigned count; - float data[4]; + gl_constant_value data[4]; }; From bf8d06c518a8e17e485b18ba03be3e1b45cc7327 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 16:01:27 -0600 Subject: [PATCH 226/600] mesa: pass correct constant type to _mesa_fetch_state() Fixes assorted warnings about float vs. gl_constant_value pointers. --- src/mesa/program/prog_statevars.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 16f9690e865..6aa2409e85e 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx, if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { _mesa_fetch_state(ctx, paramList->Parameters[i].StateIndexes, - paramList->ParameterValues[i]); + ¶mList->ParameterValues[i][0].f); } } } From a48118e510fcbb57634a7869cb628123fa8c3f2e Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 4 Aug 2011 18:04:44 -0700 Subject: [PATCH 227/600] mesa: Remove MSVC stdint typedefs from compiler.h. MSVC can now include the stdint.h at include/c99/stdint.h. --- src/mesa/main/compiler.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index d736fdfc58a..ee7d0b2f880 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -60,29 +60,7 @@ extern "C" { /** * Get standard integer types */ -#if defined(_MSC_VER) - typedef __int8 int8_t; - typedef unsigned __int8 uint8_t; - typedef __int16 int16_t; - typedef unsigned __int16 uint16_t; - typedef __int32 int32_t; - typedef unsigned __int32 uint32_t; - typedef __int64 int64_t; - typedef unsigned __int64 uint64_t; - -# if defined(_WIN64) - typedef __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -# else - typedef __int32 intptr_t; - typedef unsigned __int32 uintptr_t; -# endif - -# define INT64_C(__val) __val##i64 -# define UINT64_C(__val) __val##ui64 -#else -# include -#endif +#include /** From 547212d963c70161915c46d64e8020617199fb8d Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 4 Aug 2011 00:39:07 +0900 Subject: [PATCH 228/600] glsl: empty declarations should be valid Unlike C++, empty declarations such as float; should be valid. The spec is not explicit about this actually. Some apps that generate their shader sources may rely on this. This was noted when porting one of them to Linux from Windows. Reviewed-by: Chad Versace Note: this is a candidate for the 7.11 branch. --- src/glsl/ast_to_hir.cpp | 10 +++++----- src/glsl/glsl_parser.yy | 10 +++------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index c0524bf0bcc..7da14611950 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2399,12 +2399,12 @@ ast_declarator_list::hir(exec_list *instructions, decl_type = this->type->specifier->glsl_type(& type_name, state); if (this->declarations.is_empty()) { - /* The only valid case where the declaration list can be empty is when - * the declaration is setting the default precision of a built-in type - * (e.g., 'precision highp vec4;'). - */ - if (decl_type != NULL) { + /* Warn if this empty declaration is not for declaring a structure. + */ + if (this->type->specifier->structure == NULL) { + _mesa_glsl_warning(&loc, state, "empty declaration"); + } } else { _mesa_glsl_error(& loc, state, "incomplete declaration"); } diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 2c0498ece7a..1851f1e202e 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -971,13 +971,9 @@ single_declaration: fully_specified_type { void *ctx = state; - if ($1->specifier->type_specifier != ast_struct) { - _mesa_glsl_error(& @1, state, "empty declaration list\n"); - YYERROR; - } else { - $$ = new(ctx) ast_declarator_list($1); - $$->set_location(yylloc); - } + /* Empty declaration list is valid. */ + $$ = new(ctx) ast_declarator_list($1); + $$->set_location(yylloc); } | fully_specified_type any_identifier { From c251d83d916336f95109363e919920a024947230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 07:38:13 +0200 Subject: [PATCH 229/600] vbo: do not call _mesa_max_buffer_index in debug builds That code drops performance in Unigine Heaven and Tropics by a factor of 10. That's too crazy even for a debug build. NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Brian Paul --- src/mesa/vbo/vbo_exec_array.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index b908d5aea7e..32ce0e4a8ff 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -909,11 +909,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, if (0) _mesa_print_arrays(ctx); -#ifdef DEBUG /* 'end' was out of bounds, but now let's check the actual array * indexes to see if any of them are out of bounds. */ - { + if (0) { GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, ctx->Array.ElementArrayBufferObj); if (max >= ctx->Array.ArrayObj->_MaxElement) { @@ -934,7 +933,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, * upper bound wrong. */ } -#endif /* Set 'end' to the max possible legal value */ assert(ctx->Array.ArrayObj->_MaxElement >= 1); From 6b3bbf52b884ef4b5f0049623ec7154dd3c1dc31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Aug 2011 06:03:18 +0200 Subject: [PATCH 230/600] r300g: adapt to the resource_resolve interface change --- src/gallium/drivers/r300/r300_render.c | 43 +++++++++++++------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b31141a518e..d69b4cf4275 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -1267,33 +1267,31 @@ done: r300->sprite_coord_enable = last_sprite_coord_enable; } -static void r300_resource_resolve(struct pipe_context* pipe, - struct pipe_resource* dest, - unsigned dst_layer, - struct pipe_resource* src, - unsigned src_layer) +static void r300_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) { - struct r300_context* r300 = r300_context(pipe); - struct pipe_surface* srcsurf, surf_tmpl; + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *srcsurf, *dstsurf, surf_tmpl; struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; float color[] = {0, 0, 0, 0}; memset(&surf_tmpl, 0, sizeof(surf_tmpl)); - surf_tmpl.format = src->format; - surf_tmpl.usage = 0; /* not really a surface hence no bind flags */ - surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */ - surf_tmpl.u.tex.first_layer = src_layer; - surf_tmpl.u.tex.last_layer = src_layer; - srcsurf = pipe->create_surface(pipe, src, &surf_tmpl); - surf_tmpl.format = dest->format; - surf_tmpl.u.tex.first_layer = dst_layer; - surf_tmpl.u.tex.last_layer = dst_layer; + surf_tmpl.format = info->src.res->format; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = info->src.layer; + srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl); + /* XXX Offset both surfaces by x0,y1. */ + + surf_tmpl.format = info->dst.res->format; + surf_tmpl.u.tex.level = info->dst.level; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = info->dst.layer; + dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl); DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); /* Enable AA resolve. */ - aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl)); - + aa->dest = r300_surface(dstsurf); aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; @@ -1301,16 +1299,19 @@ static void r300_resource_resolve(struct pipe_context* pipe, r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ + /* XXX: y1 < 0 ==> Y flip */ r300->context.clear_render_target(pipe, - srcsurf, color, 0, 0, src->width0, src->height0); + srcsurf, color, 0, 0, + info->dst.x1 - info->dst.x0, + info->dst.y1 - info->dst.y0); /* Disable AA resolve. */ aa->aaresolve_ctl = 0; r300->aa_state.size = 4; r300_mark_atom_dirty(r300, &r300->aa_state); - pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); - pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); + pipe_surface_reference(&srcsurf, NULL); + pipe_surface_reference(&dstsurf, NULL); } void r300_init_render_functions(struct r300_context *r300) From d99c8e191b9dd206eae42ffab1ade01054026ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Aug 2011 06:04:05 +0200 Subject: [PATCH 231/600] r300g: handle new CAPs --- src/gallium/drivers/r300/r300_screen.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 47de4005c37..674bd24953c 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -142,6 +142,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_SCALED_RESOLVE: return 0; /* SWTCL-only features. */ @@ -211,13 +212,12 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 0; case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: return 0; } break; @@ -248,20 +248,15 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 1; /* XXX guessed */ case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 4 : 0; /* XXX guessed. */ + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - return 0; - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; case PIPE_SHADER_CAP_SUBROUTINES: - return 0; case PIPE_SHADER_CAP_INTEGERS: return 0; - default: - break; } break; default: From 6e7942936c5de59f509779b6f7620d80d2fbc21a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Aug 2011 06:57:07 +0200 Subject: [PATCH 232/600] st/mesa: remove unused-but-set variables in st_glsl_to_tgsi.cpp --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9c6a7ed738a..460bafb3821 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1725,7 +1725,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (index) { src.index += index->value.i[0] * element_size; } else { - st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the * base of the array and an index that offsets the TGSI register * index. @@ -2463,7 +2462,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) void glsl_to_tgsi_visitor::visit(ir_if *ir) { - glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *cond_inst, *if_inst; glsl_to_tgsi_instruction *prev_inst; prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); @@ -2495,7 +2494,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); + emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } From a3cde50effbc469379bf5d4d69d03464de43fb29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Aug 2011 07:02:25 +0200 Subject: [PATCH 233/600] st/dri: remove a dummy function dri2_create_context It does nothing besides calling dri_create_context with the same parameters. --- src/gallium/state_trackers/dri/drm/dri2.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index fe4ddb312be..5344775f96f 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -661,20 +661,6 @@ fail: return NULL; } -static boolean -dri2_create_context(gl_api api, const struct gl_config * visual, - __DRIcontext * cPriv, void *sharedContextPrivate) -{ - struct dri_context *ctx = NULL; - - if (!dri_create_context(api, visual, cPriv, sharedContextPrivate)) - return FALSE; - - ctx = cPriv->driverPrivate; - - return TRUE; -} - static boolean dri2_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, @@ -702,7 +688,7 @@ const struct __DriverAPIRec driDriverAPI = { .InitScreen = NULL, .InitScreen2 = dri2_init_screen, .DestroyScreen = dri_destroy_screen, - .CreateContext = dri2_create_context, + .CreateContext = dri_create_context, .DestroyContext = dri_destroy_context, .CreateBuffer = dri2_create_buffer, .DestroyBuffer = dri_destroy_buffer, From 115651241b7f04e7ec274c267e7de5d1c8fb8c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Aug 2011 07:07:46 +0200 Subject: [PATCH 234/600] st/dri: remove an unused-but-set variable --- src/gallium/state_trackers/dri/drm/dri2.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 5344775f96f..d491e46ab16 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -266,7 +266,6 @@ dri2_allocate_buffer(__DRIscreen *sPriv, struct dri_screen *screen = dri_screen(sPriv); struct dri2_buffer *buffer; struct pipe_resource templ; - enum st_attachment_type statt; enum pipe_format pf; unsigned bind = 0; struct winsys_handle whandle; @@ -274,22 +273,16 @@ dri2_allocate_buffer(__DRIscreen *sPriv, switch (attachment) { case __DRI_BUFFER_FRONT_LEFT: case __DRI_BUFFER_FAKE_FRONT_LEFT: - statt = ST_ATTACHMENT_FRONT_LEFT; bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case __DRI_BUFFER_BACK_LEFT: - statt = ST_ATTACHMENT_BACK_LEFT; bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case __DRI_BUFFER_DEPTH: case __DRI_BUFFER_DEPTH_STENCIL: case __DRI_BUFFER_STENCIL: - statt = ST_ATTACHMENT_DEPTH_STENCIL; bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ break; - default: - statt = ST_ATTACHMENT_INVALID; - break; } switch (format) { From 64ab39b035f755510a644643b96451431bbe5f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 20:57:48 +0200 Subject: [PATCH 235/600] winsys/radeon: fix space checking We should remove the relocations which caused a validation failure from the list, so that the kernel receives only the validated ones. NOTE: This is a candidate for the 7.11 branch. --- src/gallium/drivers/r300/r300_emit.c | 3 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 33 +++++++++++++++++-- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 1 + src/gallium/winsys/radeon/drm/radeon_winsys.h | 4 ++- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 502aed3a20c..b953bd10f43 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1238,13 +1238,12 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, r300_resource(index_buffer)->domain, 0); - /* Now do the validation. */ + /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { /* Ooops, an infinite loop, give up. */ if (flushed) return FALSE; - r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); flushed = TRUE; goto validate; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index f0f4a70be3f..fec660d4cc8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -115,6 +115,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) } csc->crelocs = 0; + csc->validated_crelocs = 0; csc->chunks[0].length_dw = 0; csc->chunks[1].length_dw = 0; csc->used_gart = 0; @@ -307,9 +308,37 @@ static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + boolean status = + cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && + cs->csc->used_vram < cs->ws->info.vram_size * 0.8; - return cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && - cs->csc->used_vram < cs->ws->info.vram_size * 0.8; + if (status) { + cs->csc->validated_crelocs = cs->csc->crelocs; + } else { + /* Remove lately-added relocations. The validation failed with them + * and the CS is about to be flushed because of that. Keep only + * the already-validated relocations. */ + unsigned i; + + for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { + p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); + radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); + } + cs->csc->crelocs = cs->csc->validated_crelocs; + + /* Flush if there are any relocs. Clean up otherwise. */ + if (cs->csc->crelocs) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + } else { + radeon_cs_context_cleanup(cs->csc); + + assert(cs->base.cdw == 0); + if (cs->base.cdw != 0) { + fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); + } + } + } + return status; } static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index ea2a820b30a..fe285326884 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -41,6 +41,7 @@ struct radeon_cs_context { /* Relocs. */ unsigned nrelocs; unsigned crelocs; + unsigned validated_crelocs; struct radeon_bo **relocs_bo; struct drm_radeon_cs_reloc *relocs; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index f8a4d3abd43..6d52dc25022 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -262,7 +262,9 @@ struct radeon_winsys { /** * Return TRUE if there is enough memory in VRAM and GTT for the relocs - * added so far. + * added so far. If the validation fails, all the relocations which have + * been added since the last call of cs_validate will be removed and + * the CS will be flushed (provided there are still any relocations). * * \param cs A command stream to validate. */ From 5b005ecc2b624a0ffb577ab760abacf069694f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 21:01:31 +0200 Subject: [PATCH 236/600] winsys/radeon: do the CS cleanup in the CS ioctl thread --- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index fec660d4cc8..1b30b95a318 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -380,6 +380,8 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) for (i = 0; i < csc->crelocs; i++) p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); + + radeon_cs_context_cleanup(csc); return NULL; } @@ -424,6 +426,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) } else { radeon_drm_cs_emit_ioctl(cs->csc); } + } else { + radeon_cs_context_cleanup(cs->csc); } /* Flip command streams. */ @@ -432,8 +436,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) cs->cst = tmp; /* Prepare a new CS. */ - radeon_cs_context_cleanup(cs->csc); - cs->base.buf = cs->csc->buf; cs->base.cdw = 0; } From 0722edc59cd526437c2d4bad474b934dad84d789 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 28 Jul 2011 09:57:19 -0700 Subject: [PATCH 237/600] i965/fs: Don't allocate the old backend's compile structs for our compile. This saves some 35MB when the program only uses GLSL shaders. --- src/mesa/drivers/dri/i965/brw_wm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index b0dfdd536aa..d13ac6124c8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw, */ return false; } - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw, if (!brw_wm_fs_emit(brw, c, prog)) return false; } else { + if (!c->instruction) { + c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); + c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); + c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); + c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); + } + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); From ee0373b833155804bb8846c6f05f897b9ee5afa6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:13:04 -0700 Subject: [PATCH 238/600] i965/fs: Don't upload unused uniform components. This saves both register space and upload bandwidth for unused values. Note that previously we were relying on the visitor not initially generating references to different sets of uniforms between the 8-wide and 16-wide code generation, and now we're relying on them dead-code eliminating the same stuff, too. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 89 +++++++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_fs.h | 10 +++- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 02041b3bc03..f55be022f72 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -242,11 +242,12 @@ import_uniforms_callback(const void *key, * This brings in those uniform definitions */ void -fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(src_variable_ht, + hash_table_call_foreach(v->variable_ht, import_uniforms_callback, variable_ht); + this->params_remap = v->params_remap; } /* Our support for uniforms is piggy-backed on the struct @@ -798,6 +799,86 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +bool +fs_visitor::remove_dead_constants() +{ + if (c->dispatch_width == 8) { + this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); + + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) + this->params_remap[i] = -1; + + /* Find which params are still in use. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(constant_nr < (int)c->prog_data.nr_params); + + /* For now, set this to non-negative. We'll give it the + * actual new number in a moment, in order to keep the + * register numbers nicely ordered. + */ + this->params_remap[constant_nr] = 0; + } + } + + /* Figure out what the new numbers for the params will be. At some + * point when we're doing uniform array access, we're going to want + * to keep the distinction between .reg and .reg_offset, but for + * now we don't care. + */ + unsigned int new_nr_params = 0; + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + if (this->params_remap[i] != -1) { + this->params_remap[i] = new_nr_params++; + } + } + + /* Update the list of params to be uploaded to match our new numbering. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + int remapped = this->params_remap[i]; + + if (remapped == -1) + continue; + + /* We've already done setup_paramvalues_refs() so no need to worry + * about param_index and param_offset. + */ + c->prog_data.param[remapped] = c->prog_data.param[i]; + c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i]; + } + + c->prog_data.nr_params = new_nr_params; + } else { + /* This should have been generated in the 8-wide pass already. */ + assert(this->params_remap); + } + + /* Now do the renumbering of the shader to remove unused params. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(this->params_remap[constant_nr] != -1); + inst->src[i].hw_reg = this->params_remap[constant_nr]; + inst->src[i].reg_offset = 0; + } + } + + return true; +} + /** * Choose accesses from the UNIFORM file to demote to using the pull * constant buffer. @@ -1624,6 +1705,8 @@ fs_visitor::run() progress = dead_code_eliminate() || progress; } while (progress); + remove_dead_constants(); + schedule_instructions(); assign_curb_setup(); @@ -1702,7 +1785,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; fs_visitor v2(c, prog, shader); - v2.import_uniforms(v.variable_ht); + v2.import_uniforms(&v); v2.run(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 89d6cda7e4f..96e1420038f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -421,7 +421,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); - void import_uniforms(struct hash_table *src_variable_ht); + void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -489,6 +489,7 @@ public: bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_dead_constants(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); @@ -566,6 +567,13 @@ public: int *virtual_grf_use; bool live_intervals_valid; + /* This is the map from UNIFORM hw_reg + reg_offset as generated by + * the visitor to the packed uniform number after + * remove_dead_constants() that represents the actual uploaded + * uniform index. + */ + int *params_remap; + struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; int first_non_payload_grf; From 69dc529da241747888efefdf0d3e58479dd6248c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 28 Jul 2011 09:52:03 -0700 Subject: [PATCH 239/600] mesa: Remove dead "MemPool" field of gl_shader_state. --- src/mesa/main/mtypes.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index b88118366b2..2d5f44c1e7b 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2252,8 +2252,6 @@ struct gl_shader_state */ struct gl_shader_program *ActiveProgram; - void *MemPool; - GLbitfield Flags; /**< Mask of GLSL_x flags */ }; From 9998df36c271810ecf20041bf6bed28f3952a94f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:15:25 -0700 Subject: [PATCH 240/600] i965: Add dumping for gen6 WM constants too. This looks just like the VS dump for now. --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_state_dump.c | 20 ++++++++++++++++++++ src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 471015cf9d0..22baf978ad4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -212,6 +212,7 @@ enum state_struct_type { AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, }; /** Subclass of Mesa vertex program */ diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b9e5cc1a534..cb7a3ef73d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) } } +static void +dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "WM_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_VS_CONSTANTS: dump_vs_constants(brw, offset, size); break; + case AUB_TRACE_WM_CONSTANTS: + dump_wm_constants(brw, offset, size); + break; default: break; } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 185da9c355f..3d525248f25 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, + constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); From 6bd5f43f212962a054a41290b0f8e350dae2f40d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 15:13:08 -0700 Subject: [PATCH 241/600] prog_optimize: Add support for saturates to _mesa_merge_mov_into_inst. This fixes the remaining regression from ff_fragment_shader in Mesa IR instruction count, to now being a 1.9% win overall. --- src/mesa/program/prog_optimize.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index f4a7a638d5f..3340ce0498b 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov) mov->SrcReg[0].HasIndex2 == 0 && mov->SrcReg[0].RelAddr2 == 0 && mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR && - mov->SaturateMode == SATURATE_OFF; + mov->DstReg.CondMask == COND_TR; } @@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) { return can_downward_mov_be_modifed(mov) && - mov->DstReg.File == PROGRAM_TEMPORARY; + mov->DstReg.File == PROGRAM_TEMPORARY && + mov->SaturateMode == SATURATE_OFF; } @@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; + inst->SaturateMode |= mov->SaturateMode; + /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only * consider completely active sources and destinations From 62722d90af9d43d889af33b080a682f2004e049c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 13:54:15 -0700 Subject: [PATCH 242/600] ir_to_mesa: Try to avoid emitting a MOV_SAT to saturate an expression tree. Fixes a regression in codegen quality for ff_fragment_shader conversion to GLSL -- glean texCombine produces 7.5% fewer Mesa IR instructions. --- src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index debadb9a398..9b615b68a23 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -915,10 +915,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); src_reg src = this->result; - this->result = get_temp(ir->type); - ir_to_mesa_instruction *inst; - inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + ir_to_mesa_instruction *new_inst; + new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + ir_to_mesa_instruction *inst; + inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + } return true; } From fbc2fcf685d22ec9bc9465e1f731529979497eaa Mon Sep 17 00:00:00 2001 From: Christopher James Halse Rogers Date: Thu, 4 Aug 2011 12:06:13 +1000 Subject: [PATCH 243/600] glx/dri2: Paper over errors in DRI2Connect when indirect DRI2 will throw BadRequest for this when the client is not local, but DRI2 is an implementation detail and not something callers should have to know about. Silently swallow errors in this case, and just propagate the failure through DRI2Connect's return code. Note: This is a candidate for the stable release branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=28125 Signed-off-by: Christopher James Halse Rogers --- src/glx/dri2.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/glx/dri2.c b/src/glx/dri2.c index 229840d6919..b1b5013d048 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -190,6 +190,15 @@ DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code) err->minorCode == X_DRI2DestroyDrawable) return True; + /* If the server is non-local DRI2Connect will raise BadRequest. + * Swallow this so that DRI2Connect can signal this in its return code */ + if (err->majorCode == codes->major_opcode && + err->minorCode == X_DRI2Connect && + err->errorCode == BadRequest) { + *ret_code = False; + return True; + } + return False; } From 4c7e215c7bb09f827df630cbfc80e87869351f18 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 2 May 2011 16:27:46 -0700 Subject: [PATCH 244/600] ir_to_mesa: Replace open-coded swizzle_for_size() --- src/mesa/program/ir_to_mesa.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 9b615b68a23..1ef609fe15d 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -641,8 +641,6 @@ src_reg ir_to_mesa_visitor::get_temp(const glsl_type *type) { src_reg src; - int swizzle[4]; - int i; src.file = PROGRAM_TEMPORARY; src.index = next_temp; @@ -652,12 +650,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; From b44648c9186d403abaeeeb3190d6759f951a49e4 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 5 Aug 2011 14:09:37 -0500 Subject: [PATCH 245/600] glsl_to_tgsi: try to avoid emitting a MOV_SAT to saturate an expression tree This is a port of commit 62722d9 to glsl_to_tgsi, with minor aesthetic changes (moved the declaration and assignment of new_inst inside the if block). --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 32 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 460bafb3821..e10243add8a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1232,12 +1232,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); st_src_reg src = this->result; - this->result = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + glsl_to_tgsi_instruction *new_inst; + new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); + inst->saturate = true; + } return true; } From 5164244df02f33d6ad9e0a286f4b6d6af2dfbc75 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 5 Aug 2011 14:37:33 -0500 Subject: [PATCH 246/600] glsl_to_tgsi: replace open-coded swizzle_for_size() This is a port of commit 4c7e215c7bb to glsl_to_tgsi. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e10243add8a..d7a1ba80e1d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -949,8 +949,6 @@ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) { st_src_reg src; - int swizzle[4]; - int i; src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; @@ -961,12 +959,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; From a9e97d022cb68266639eb54947517454c8ffe45e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 12:47:25 -0700 Subject: [PATCH 247/600] intel: Fix warnings from gl_constant_parameter changes. --- src/mesa/drivers/dri/i915/i915_fragprog.c | 6 ++---- src/mesa/drivers/dri/i965/brw_wm_fp.c | 10 +++++----- src/mesa/drivers/dri/i965/brw_wm_pass0.c | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 32050cebf33..d155b85ffca 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - src = - i915_emit_param4fv(p, - program->Base.Parameters->ParameterValues[source-> - Index]); + src = i915_emit_param4fv(p, + &program->Base.Parameters->ParameterValues[source->Index][0].f); break; default: diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 7cd3edad235..d52a9581f5e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, GLfloat s3) { struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; + gl_constant_value values[4]; GLuint idx; GLuint swizzle; struct prog_src_register reg; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); reg = src_reg(PROGRAM_STATE_VAR, idx); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index f78bdc31866..ccf9dc2bc18 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_const_ref(c, &plist->ParameterValues[idx][component].f); break; case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + ref = get_param_ref(c, &plist->ParameterValues[idx][component].f ); break; default: From db726b048e8858af226dbd0f0fda72d0be01394e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 30 Jul 2011 21:26:26 -0700 Subject: [PATCH 248/600] mesa: In validate_program(), initialize errMsg for safety. validate_program relies on validate_shader_program to fill in errMsg; empirically, there exist cases where that doesn't happen. While tracking those down may be worthwhile, initializing the string so we don't try to ralloc_strdup random garbage also seems wise. Fixes issues caught by valgrind while running some test case. NOTE: This is a candidate for stable release branches. Reviewed-by: Chad Versace Signed-off-by: Kenneth Graunke --- src/mesa/main/shaderapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 8df25c3f988..74997eaaa77 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1125,7 +1125,7 @@ static void validate_program(struct gl_context *ctx, GLuint program) { struct gl_shader_program *shProg; - char errMsg[100]; + char errMsg[100] = ""; shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram"); if (!shProg) { From 1554e69e00566bc7255b82f5ea93b1f02f1a5bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 6 Aug 2011 05:15:30 +0200 Subject: [PATCH 249/600] winsys/radeon: disable use of the buffer busy-for-write flag --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 2eb9d134407..609a9065db8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -192,6 +192,17 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); radeon_bo_wait((struct pb_buffer*)bo); + } else { + /* XXX We could check whether the buffer is busy for write here. */ + radeon_bo_wait((struct pb_buffer*)bo); + } +#if 0 + /* XXX This per-winsys busy-for-write tracking sucks. + * What if some other process wrote something, e.g. using + * DRI2CopyRegion? We wouldn't get the busy_for_write flag + * set, skipping bo_wait. + * We need to move the is-busy-for-write query into the kernel. + */ } else if (bo->busy_for_write) { /* Update the busy_for_write field (done by radeon_bo_is_busy) * and wait if needed. */ @@ -199,6 +210,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, radeon_bo_wait((struct pb_buffer*)bo); } } +#endif } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { From 425b179fafe93ddf4abacbccb67ed6aecbef6a7e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 5 Aug 2011 20:10:04 +0200 Subject: [PATCH 250/600] st/mesa: don't resolve stencil twice --- src/mesa/state_tracker/st_cb_blit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 626db12431d..750f541b5dd 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -107,8 +107,10 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx, dstRb = st_renderbuffer(dstDepth->Renderbuffer); info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; - if (combined && (mask & GL_STENCIL_BUFFER_BIT)) + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) { + mask &= ~GL_STENCIL_BUFFER_BIT; info->mask |= PIPE_MASK_S; + } info->src.res = srcRb->texture; info->src.layer = srcRb->surface->u.tex.first_layer; From 9e466e87e6fde23f8ec0923be86005be81ac2d24 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 25 Jul 2011 18:13:26 +0200 Subject: [PATCH 251/600] nv50,nvc0: never convert in resource copy when format sizes match If there are any cases left where the st thinks that RGBA -> BGRA will swap components, it will get what it deserves. Now the GPU's 2D engine goes unused. What a shame. --- src/gallium/drivers/nv50/nv50_surface.c | 9 ++++++++- src/gallium/drivers/nvc0/nvc0_surface.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 1a5077e970b..8bca900e1ff 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -198,6 +198,7 @@ nv50_resource_copy_region(struct pipe_context *pipe, { struct nv50_screen *screen = nv50_context(pipe)->screen; int ret; + boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; /* Fallback for buffers. */ @@ -207,9 +208,15 @@ nv50_resource_copy_region(struct pipe_context *pipe, return; } + assert(src->nr_samples == dst->nr_samples); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index 67bba3c6cc3..a4fd17e5324 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -205,6 +205,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe, { struct nvc0_screen *screen = nvc0_context(pipe)->screen; int ret; + boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; /* Fallback for buffers. */ @@ -214,9 +215,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe, return; } + assert(src->nr_samples == dst->nr_samples); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); From 4dd3272df9f6d483cb3734c3b8c77e9c190b3773 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 7 Aug 2011 15:34:07 +0200 Subject: [PATCH 252/600] d3d1x: adapt to resource_resolve interface change --- .../d3d1x/gd3d11/d3d11_context.h | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index 12f2aaddc91..aedf82a4381 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -1726,9 +1726,26 @@ changed: SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; - unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource); - unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource); - pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer); + struct pipe_resolve_info info; + + info.dst.res = dst->resource; + info.src.res = src->resource; + info.dst.level = 0; + info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource); + info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource); + + info.src.x0 = 0; + info.src.x1 = info.src.res->width0; + info.src.y0 = 0; + info.src.y1 = info.src.res->height0; + info.dst.x0 = 0; + info.dst.x1 = info.dst.res->width0; + info.dst.y0 = 0; + info.dst.y1 = info.dst.res->height0; + + info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS; + + pipe->resource_resolve(pipe, &info); } #if API >= 11 From 8488112d20d49d3dc7fefef19c6e550e4b71661c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Aug 2011 15:01:41 -0600 Subject: [PATCH 253/600] mesa: whitespace changes --- src/mesa/program/prog_parameter.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index f858cf0fa0d..1a5ed343937 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -46,16 +46,19 @@ #define PROG_PARAM_BIT_CYL_WRAP 0x10 /**< XXX gallium debug */ /*@}*/ + /** * Actual data for constant values of parameters. */ -typedef union gl_constant_value { - GLfloat f; - GLboolean b; - GLint i; - GLuint u; +typedef union gl_constant_value +{ + GLfloat f; + GLboolean b; + GLint i; + GLuint u; } gl_constant_value; + /** * Program parameter. * Used by shaders/programs for uniforms, constants, varying vars, etc. From 7d4d8a8de7c5877108040fa692f2914452b10789 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 8 Aug 2011 09:00:06 -0600 Subject: [PATCH 254/600] gallium: silence warnings about trailing commas in enum lists --- src/gallium/include/pipe/p_defines.h | 2 +- src/gallium/include/pipe/p_video_enums.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 1ef3ae71e76..795de1fbf62 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -495,7 +495,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14, PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ - PIPE_SHADER_CAP_INTEGERS = 17, + PIPE_SHADER_CAP_INTEGERS = 17 }; diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h index 492ab84e33f..13786067d53 100644 --- a/src/gallium/include/pipe/p_video_enums.h +++ b/src/gallium/include/pipe/p_video_enums.h @@ -50,7 +50,7 @@ enum pipe_video_cap PIPE_VIDEO_CAP_SUPPORTED = 0, PIPE_VIDEO_CAP_NPOT_TEXTURES = 1, PIPE_VIDEO_CAP_MAX_WIDTH = 2, - PIPE_VIDEO_CAP_MAX_HEIGHT = 3, + PIPE_VIDEO_CAP_MAX_HEIGHT = 3 }; enum pipe_video_codec From 75a98740215d82447e5189b36d1dbfa59fcdd5db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 8 Aug 2011 09:00:57 -0600 Subject: [PATCH 255/600] glsl: silence warning about trailing comma in enum list --- src/glsl/ir_function.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 2a4de5b0dcd..51d32b46f98 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -27,7 +27,7 @@ typedef enum { PARAMETER_LIST_NO_MATCH, PARAMETER_LIST_EXACT_MATCH, - PARAMETER_LIST_INEXACT_MATCH, /*< Match requires implicit conversion. */ + PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */ } parameter_list_match_t; /** From 506de1954919e5346f382e66a7ec111af7e71a56 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 8 Aug 2011 09:01:13 -0600 Subject: [PATCH 256/600] glext: upgrade to version 72 --- include/GL/glext.h | 367 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 364 insertions(+), 3 deletions(-) diff --git a/include/GL/glext.h b/include/GL/glext.h index 9048515c6d9..09400215bac 100644 --- a/include/GL/glext.h +++ b/include/GL/glext.h @@ -29,9 +29,9 @@ extern "C" { */ /* Header file version number, required by OpenGL ABI for Linux */ -/* glext.h last updated $Date: 2011-07-06 02:49:14 -0700 (Wed, 06 Jul 2011) $ */ +/* glext.h last updated $Date: 2011-08-08 00:34:29 -0700 (Mon, 08 Aug 2011) $ */ /* Current version at http://www.opengl.org/registry/ */ -#define GL_GLEXT_VERSION 71 +#define GL_GLEXT_VERSION 72 /* Function declaration macros - to move into glplatform.h */ #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) @@ -1047,6 +1047,124 @@ extern "C" { /* reuse GL_UNDEFINED_VERTEX */ #endif +#ifndef GL_VERSION_4_2 +/* Reuse tokens from ARB_base_instance (none) */ +/* Reuse tokens from ARB_shading_language_420pack (none) */ +/* Reuse tokens from ARB_transform_feedback_instanced (none) */ +/* Reuse tokens from ARB_compressed_texture_pixel_storage */ +/* reuse GL_UNPACK_COMPRESSED_BLOCK_WIDTH */ +/* reuse GL_UNPACK_COMPRESSED_BLOCK_HEIGHT */ +/* reuse GL_UNPACK_COMPRESSED_BLOCK_DEPTH */ +/* reuse GL_UNPACK_COMPRESSED_BLOCK_SIZE */ +/* reuse GL_PACK_COMPRESSED_BLOCK_WIDTH */ +/* reuse GL_PACK_COMPRESSED_BLOCK_HEIGHT */ +/* reuse GL_PACK_COMPRESSED_BLOCK_DEPTH */ +/* reuse GL_PACK_COMPRESSED_BLOCK_SIZE */ +/* Reuse tokens from ARB_conservative_depth (none) */ +/* Reuse tokens from ARB_internalformat_query */ +/* reuse GL_NUM_SAMPLE_COUNTS */ +/* Reuse tokens from ARB_map_buffer_alignment */ +/* reuse GL_MIN_MAP_BUFFER_ALIGNMENT */ +/* Reuse tokens from ARB_shader_atomic_counters */ +/* reuse GL_ATOMIC_COUNTER_BUFFER */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_BINDING */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_START */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_SIZE */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER */ +/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER */ +/* reuse GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_MAX_VERTEX_ATOMIC_COUNTERS */ +/* reuse GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS */ +/* reuse GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS */ +/* reuse GL_MAX_GEOMETRY_ATOMIC_COUNTERS */ +/* reuse GL_MAX_FRAGMENT_ATOMIC_COUNTERS */ +/* reuse GL_MAX_COMBINED_ATOMIC_COUNTERS */ +/* reuse GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE */ +/* reuse GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS */ +/* reuse GL_ACTIVE_ATOMIC_COUNTER_BUFFERS */ +/* reuse GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX */ +/* reuse GL_UNSIGNED_INT_ATOMIC_COUNTER */ +/* Reuse tokens from ARB_shader_image_load_store */ +/* reuse GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT */ +/* reuse GL_ELEMENT_ARRAY_BARRIER_BIT */ +/* reuse GL_UNIFORM_BARRIER_BIT */ +/* reuse GL_TEXTURE_FETCH_BARRIER_BIT */ +/* reuse GL_SHADER_IMAGE_ACCESS_BARRIER_BIT */ +/* reuse GL_COMMAND_BARRIER_BIT */ +/* reuse GL_PIXEL_BUFFER_BARRIER_BIT */ +/* reuse GL_TEXTURE_UPDATE_BARRIER_BIT */ +/* reuse GL_BUFFER_UPDATE_BARRIER_BIT */ +/* reuse GL_FRAMEBUFFER_BARRIER_BIT */ +/* reuse GL_TRANSFORM_FEEDBACK_BARRIER_BIT */ +/* reuse GL_ATOMIC_COUNTER_BARRIER_BIT */ +/* reuse GL_ALL_BARRIER_BITS */ +/* reuse GL_MAX_IMAGE_UNITS */ +/* reuse GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS */ +/* reuse GL_IMAGE_BINDING_NAME */ +/* reuse GL_IMAGE_BINDING_LEVEL */ +/* reuse GL_IMAGE_BINDING_LAYERED */ +/* reuse GL_IMAGE_BINDING_LAYER */ +/* reuse GL_IMAGE_BINDING_ACCESS */ +/* reuse GL_IMAGE_1D */ +/* reuse GL_IMAGE_2D */ +/* reuse GL_IMAGE_3D */ +/* reuse GL_IMAGE_2D_RECT */ +/* reuse GL_IMAGE_CUBE */ +/* reuse GL_IMAGE_BUFFER */ +/* reuse GL_IMAGE_1D_ARRAY */ +/* reuse GL_IMAGE_2D_ARRAY */ +/* reuse GL_IMAGE_CUBE_MAP_ARRAY */ +/* reuse GL_IMAGE_2D_MULTISAMPLE */ +/* reuse GL_IMAGE_2D_MULTISAMPLE_ARRAY */ +/* reuse GL_INT_IMAGE_1D */ +/* reuse GL_INT_IMAGE_2D */ +/* reuse GL_INT_IMAGE_3D */ +/* reuse GL_INT_IMAGE_2D_RECT */ +/* reuse GL_INT_IMAGE_CUBE */ +/* reuse GL_INT_IMAGE_BUFFER */ +/* reuse GL_INT_IMAGE_1D_ARRAY */ +/* reuse GL_INT_IMAGE_2D_ARRAY */ +/* reuse GL_INT_IMAGE_CUBE_MAP_ARRAY */ +/* reuse GL_INT_IMAGE_2D_MULTISAMPLE */ +/* reuse GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY */ +/* reuse GL_UNSIGNED_INT_IMAGE_1D */ +/* reuse GL_UNSIGNED_INT_IMAGE_2D */ +/* reuse GL_UNSIGNED_INT_IMAGE_3D */ +/* reuse GL_UNSIGNED_INT_IMAGE_2D_RECT */ +/* reuse GL_UNSIGNED_INT_IMAGE_CUBE */ +/* reuse GL_UNSIGNED_INT_IMAGE_BUFFER */ +/* reuse GL_UNSIGNED_INT_IMAGE_1D_ARRAY */ +/* reuse GL_UNSIGNED_INT_IMAGE_2D_ARRAY */ +/* reuse GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY */ +/* reuse GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE */ +/* reuse GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY */ +/* reuse GL_MAX_IMAGE_SAMPLES */ +/* reuse GL_IMAGE_BINDING_FORMAT */ +/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_TYPE */ +/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE */ +/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS */ +/* reuse GL_MAX_VERTEX_IMAGE_UNIFORMS */ +/* reuse GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS */ +/* reuse GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS */ +/* reuse GL_MAX_GEOMETRY_IMAGE_UNIFORMS */ +/* reuse GL_MAX_FRAGMENT_IMAGE_UNIFORMS */ +/* reuse GL_MAX_COMBINED_IMAGE_UNIFORMS */ +/* Reuse tokens from ARB_shading_language_packing (none) */ +/* Reuse tokens from ARB_texture_storage */ +/* reuse GL_TEXTURE_IMMUTABLE_FORMAT */ +#endif + #ifndef GL_ARB_multitexture #define GL_TEXTURE0_ARB 0x84C0 #define GL_TEXTURE1_ARB 0x84C1 @@ -2140,6 +2258,143 @@ extern "C" { #ifndef GL_ARB_shader_stencil_export #endif +#ifndef GL_ARB_base_instance +#endif + +#ifndef GL_ARB_shading_language_420pack +#endif + +#ifndef GL_ARB_transform_feedback_instanced +#endif + +#ifndef GL_ARB_compressed_texture_pixel_storage +#define GL_UNPACK_COMPRESSED_BLOCK_WIDTH 0x9127 +#define GL_UNPACK_COMPRESSED_BLOCK_HEIGHT 0x9128 +#define GL_UNPACK_COMPRESSED_BLOCK_DEPTH 0x9129 +#define GL_UNPACK_COMPRESSED_BLOCK_SIZE 0x912A +#define GL_PACK_COMPRESSED_BLOCK_WIDTH 0x912B +#define GL_PACK_COMPRESSED_BLOCK_HEIGHT 0x912C +#define GL_PACK_COMPRESSED_BLOCK_DEPTH 0x912D +#define GL_PACK_COMPRESSED_BLOCK_SIZE 0x912E +#endif + +#ifndef GL_ARB_conservative_depth +#endif + +#ifndef GL_ARB_internalformat_query +#define GL_NUM_SAMPLE_COUNTS 0x9380 +#endif + +#ifndef GL_ARB_map_buffer_alignment +#define GL_MIN_MAP_BUFFER_ALIGNMENT 0x90BC +#endif + +#ifndef GL_ARB_shader_atomic_counters +#define GL_ATOMIC_COUNTER_BUFFER 0x92C0 +#define GL_ATOMIC_COUNTER_BUFFER_BINDING 0x92C1 +#define GL_ATOMIC_COUNTER_BUFFER_START 0x92C2 +#define GL_ATOMIC_COUNTER_BUFFER_SIZE 0x92C3 +#define GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE 0x92C4 +#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS 0x92C5 +#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES 0x92C6 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER 0x92C7 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER 0x92C8 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER 0x92C9 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER 0x92CA +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER 0x92CB +#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC +#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS 0x92CD +#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS 0x92CE +#define GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS 0x92CF +#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0 +#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1 +#define GL_MAX_VERTEX_ATOMIC_COUNTERS 0x92D2 +#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS 0x92D3 +#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS 0x92D4 +#define GL_MAX_GEOMETRY_ATOMIC_COUNTERS 0x92D5 +#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS 0x92D6 +#define GL_MAX_COMBINED_ATOMIC_COUNTERS 0x92D7 +#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8 +#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC +#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9 +#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB +#endif + +#ifndef GL_ARB_shader_image_load_store +#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001 +#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002 +#define GL_UNIFORM_BARRIER_BIT 0x00000004 +#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008 +#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 +#define GL_COMMAND_BARRIER_BIT 0x00000040 +#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080 +#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100 +#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200 +#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400 +#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800 +#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000 +#define GL_ALL_BARRIER_BITS 0xFFFFFFFF +#define GL_MAX_IMAGE_UNITS 0x8F38 +#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39 +#define GL_IMAGE_BINDING_NAME 0x8F3A +#define GL_IMAGE_BINDING_LEVEL 0x8F3B +#define GL_IMAGE_BINDING_LAYERED 0x8F3C +#define GL_IMAGE_BINDING_LAYER 0x8F3D +#define GL_IMAGE_BINDING_ACCESS 0x8F3E +#define GL_IMAGE_1D 0x904C +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_2D_RECT 0x904F +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_BUFFER 0x9051 +#define GL_IMAGE_1D_ARRAY 0x9052 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 +#define GL_IMAGE_2D_MULTISAMPLE 0x9055 +#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056 +#define GL_INT_IMAGE_1D 0x9057 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_2D_RECT 0x905A +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_BUFFER 0x905C +#define GL_INT_IMAGE_1D_ARRAY 0x905D +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F +#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060 +#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061 +#define GL_UNSIGNED_INT_IMAGE_1D 0x9062 +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067 +#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C +#define GL_MAX_IMAGE_SAMPLES 0x906D +#define GL_IMAGE_BINDING_FORMAT 0x906E +#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7 +#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8 +#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9 +#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA +#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB +#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC +#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD +#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE +#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF +#endif + +#ifndef GL_ARB_shading_language_packing +#endif + +#ifndef GL_ARB_texture_storage +#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F +#endif + #ifndef GL_EXT_abgr #define GL_ABGR_EXT 0x8000 #endif @@ -5917,7 +6172,7 @@ typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEIPROC) (GLuint buf, GLenum srcRGB, #ifndef GL_VERSION_4_1 #define GL_VERSION_4_1 1 -/* OpenGL 4.1 also reuses entry points from these extensions: */ +/* OpenGL 4.1 reuses entry points from these extensions: */ /* ARB_ES2_compatibility */ /* ARB_get_program_binary */ /* ARB_separate_shader_objects */ @@ -5926,6 +6181,22 @@ typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEIPROC) (GLuint buf, GLenum srcRGB, /* ARB_viewport_array */ #endif +#ifndef GL_VERSION_4_2 +#define GL_VERSION_4_2 1 +/* OpenGL 4.2 reuses entry points from these extensions: */ +/* ARB_base_instance */ +/* ARB_shading_language_420pack (no entry points) */ +/* ARB_transform_feedback_instanced */ +/* ARB_compressed_texture_pixel_storage (no entry points) */ +/* ARB_conservative_depth (no entry points) */ +/* ARB_internalformat_query */ +/* ARB_map_buffer_alignment (no entry points) */ +/* ARB_shader_atomic_counters */ +/* ARB_shader_image_load_store */ +/* ARB_shading_language_packing (no entry points) */ +/* ARB_texture_storage */ +#endif + #ifndef GL_ARB_multitexture #define GL_ARB_multitexture 1 #ifdef GL_GLEXT_PROTOTYPES @@ -6851,6 +7122,10 @@ typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERFVPROC) (GLuint sampler, GLenum typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC) (GLuint sampler, GLenum pname, GLuint *params); #endif +#ifndef GL_ARB_shader_bit_encoding +#define GL_ARB_shader_bit_encoding 1 +#endif + #ifndef GL_ARB_texture_rgb10_a2ui #define GL_ARB_texture_rgb10_a2ui 1 #endif @@ -7357,6 +7632,92 @@ typedef void (APIENTRYP PFNGLGETNUNIFORMDVARBPROC) (GLuint program, GLint locati #define GL_ARB_shader_stencil_export 1 #endif +#ifndef GL_ARB_base_instance +#define GL_ARB_base_instance 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawArraysInstancedBaseInstance (GLenum mode, GLint first, GLsizei count, GLsizei primcount, GLuint baseinstance); +GLAPI void APIENTRY glDrawElementsInstancedBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLuint baseinstance); +GLAPI void APIENTRY glDrawElementsInstancedBaseVertexBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount, GLuint baseinstance); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLuint baseinstance); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance); +#endif + +#ifndef GL_ARB_shading_language_420pack +#define GL_ARB_shading_language_420pack 1 +#endif + +#ifndef GL_ARB_transform_feedback_instanced +#define GL_ARB_transform_feedback_instanced 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawTransformFeedbackInstanced (GLenum mode, GLuint id, GLsizei primcount); +GLAPI void APIENTRY glDrawTransformFeedbackStreamInstanced (GLenum mode, GLuint id, GLuint stream, GLsizei primcount); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC) (GLenum mode, GLuint id, GLsizei primcount); +typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC) (GLenum mode, GLuint id, GLuint stream, GLsizei primcount); +#endif + +#ifndef GL_ARB_compressed_texture_pixel_storage +#define GL_ARB_compressed_texture_pixel_storage 1 +#endif + +#ifndef GL_ARB_conservative_depth +#define GL_ARB_conservative_depth 1 +#endif + +#ifndef GL_ARB_internalformat_query +#define GL_ARB_internalformat_query 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); +#endif + +#ifndef GL_ARB_map_buffer_alignment +#define GL_ARB_map_buffer_alignment 1 +#endif + +#ifndef GL_ARB_shader_atomic_counters +#define GL_ARB_shader_atomic_counters 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glGetActiveAtomicCounterBufferiv (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC) (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params); +#endif + +#ifndef GL_ARB_shader_image_load_store +#define GL_ARB_shader_image_load_store 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBindImageTexture (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); +GLAPI void APIENTRY glMemoryBarrier (GLbitfield barriers); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); +typedef void (APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers); +#endif + +#ifndef GL_ARB_shading_language_packing +#define GL_ARB_shading_language_packing 1 +#endif + +#ifndef GL_ARB_texture_storage +#define GL_ARB_texture_storage 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glTexStorage1D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GLAPI void APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GLAPI void APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GLAPI void APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLTEXSTORAGE1DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (APIENTRYP PFNGLTEXSTORAGE2DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXSTORAGE3DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +#endif + #ifndef GL_EXT_abgr #define GL_EXT_abgr 1 #endif From ffb7d02154186402f64e0b628998485309774bb8 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 7 Aug 2011 14:15:35 -0500 Subject: [PATCH 257/600] st/mesa: inline st_prepare_fragment_program in st_translate_fragment_program This reverts an unnecessary part of commit 4683529048ee and fixes misrendering and an assertion failure in Cogs. Fixes freedesktop.org bug 39888. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_program.c | 326 ++++++++++++++-------------- src/mesa/state_tracker/st_program.h | 15 -- 2 files changed, 162 insertions(+), 179 deletions(-) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index ca01d2e1976..a4f47edfcd3 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -416,151 +416,6 @@ st_get_vp_variant(struct st_context *st, return vpv; } -/** - * Translate Mesa fragment shader attributes to TGSI attributes. - * \return GL_TRUE if color output should be written to all render targets, - * GL_FALSE if not - */ -GLboolean -st_prepare_fragment_program(struct gl_context *ctx, - struct st_fragment_program *stfp) -{ - GLuint attr; - const GLbitfield inputsRead = stfp->Base.Base.InputsRead; - GLboolean write_all = GL_FALSE; - - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { - if (inputsRead & (1 << attr)) { - const GLuint slot = stfp->num_inputs++; - - stfp->input_to_index[attr] = slot; - - switch (attr) { - case FRAG_ATTRIB_WPOS: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL0: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL1: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - stfp->input_semantic_index[slot] = 1; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_FOGC: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case FRAG_ATTRIB_FACE: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - /* In most cases, there is nothing special about these - * inputs, so adopt a convention to use the generic - * semantic name and the mesa FRAG_ATTRIB_ number as the - * index. - * - * All that is required is that the vertex shader labels - * its own outputs similarly, and that the vertex shader - * generates at least every output required by the - * fragment shader plus fixed-function hardware (such as - * BFC). - * - * There is no requirement that semantic indexes start at - * zero or be restricted to a particular range -- nobody - * should be building tables based on semantic index. - */ - case FRAG_ATTRIB_PNTC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - case FRAG_ATTRIB_VAR0: - default: - /* Actually, let's try and zero-base this just for - * readability of the generated TGSI. - */ - assert(attr >= FRAG_ATTRIB_TEX0); - stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - if (attr == FRAG_ATTRIB_PNTC) - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - else - stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - } - } - else { - stfp->input_to_index[attr] = -1; - } - } - - /* - * Semantics and mapping for outputs - */ - { - uint numColors = 0; - GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; - - /* if z is written, emit that first */ - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION; - stfp->output_semantic_index[stfp->num_outputs] = 0; - stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs; - stfp->num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { - stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL; - stfp->output_semantic_index[stfp->num_outputs] = 0; - stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs; - stfp->num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); - } - - /* handle remaning outputs (color) */ - for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { - if (outputsWritten & BITFIELD64_BIT(attr)) { - switch (attr) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - /* handled above */ - assert(0); - break; - case FRAG_RESULT_COLOR: - write_all = GL_TRUE; /* fallthrough */ - default: - assert(attr == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); - stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR; - stfp->output_semantic_index[stfp->num_outputs] = numColors; - stfp->result_to_output[attr] = stfp->num_outputs; - numColors++; - break; - } - - stfp->num_outputs++; - } - } - } - - return write_all; -} - /** * Translate a Mesa fragment shader into a TGSI shader using extra info in @@ -613,12 +468,155 @@ st_translate_fragment_program(struct st_context *st, if (!stfp->tgsi.tokens) { /* need to translate Mesa instructions to TGSI now */ + GLuint outputMapping[FRAG_RESULT_MAX]; + GLuint inputMapping[FRAG_ATTRIB_MAX]; + GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ + GLuint attr; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; - GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); + + GLboolean write_all = GL_FALSE; + + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + uint fs_num_inputs = 0; + + ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint fs_num_outputs = 0; if (!stfp->glsl_to_tgsi) _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + /* + * Convert Mesa program inputs to TGSI input register semantics. + */ + for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { + if (inputsRead & (1 << attr)) { + const GLuint slot = fs_num_inputs++; + + inputMapping[attr] = slot; + + switch (attr) { + case FRAG_ATTRIB_WPOS: + input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL0: + input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL1: + input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + input_semantic_index[slot] = 1; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_FOGC: + input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_FACE: + input_semantic_name[slot] = TGSI_SEMANTIC_FACE; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; + break; + /* In most cases, there is nothing special about these + * inputs, so adopt a convention to use the generic + * semantic name and the mesa FRAG_ATTRIB_ number as the + * index. + * + * All that is required is that the vertex shader labels + * its own outputs similarly, and that the vertex shader + * generates at least every output required by the + * fragment shader plus fixed-function hardware (such as + * BFC). + * + * There is no requirement that semantic indexes start at + * zero or be restricted to a particular range -- nobody + * should be building tables based on semantic index. + */ + case FRAG_ATTRIB_PNTC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + case FRAG_ATTRIB_VAR0: + default: + /* Actually, let's try and zero-base this just for + * readability of the generated TGSI. + */ + assert(attr >= FRAG_ATTRIB_TEX0); + input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); + input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + if (attr == FRAG_ATTRIB_PNTC) + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + else + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + } + } + else { + inputMapping[attr] = -1; + } + } + + /* + * Semantics and mapping for outputs + */ + { + uint numColors = 0; + GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; + + /* if z is written, emit that first */ + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; + fs_output_semantic_index[fs_num_outputs] = 0; + outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; + fs_num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); + } + + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; + fs_output_semantic_index[fs_num_outputs] = 0; + outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; + fs_num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); + } + + /* handle remaning outputs (color) */ + for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { + if (outputsWritten & BITFIELD64_BIT(attr)) { + switch (attr) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + /* handled above */ + assert(0); + break; + case FRAG_RESULT_COLOR: + write_all = GL_TRUE; /* fallthrough */ + default: + assert(attr == FRAG_RESULT_COLOR || + (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; + fs_output_semantic_index[fs_num_outputs] = numColors; + outputMapping[attr] = fs_num_outputs; + numColors++; + break; + } + + fs_num_outputs++; + } + } + } + ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) return NULL; @@ -638,32 +636,32 @@ st_translate_fragment_program(struct st_context *st, stfp->glsl_to_tgsi, &stfp->Base.Base, /* inputs */ - stfp->num_inputs, - stfp->input_to_index, - stfp->input_semantic_name, - stfp->input_semantic_index, - stfp->interp_mode, + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, /* outputs */ - stfp->num_outputs, - stfp->result_to_output, - stfp->output_semantic_name, - stfp->output_semantic_index, FALSE ); + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); else st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, &stfp->Base.Base, /* inputs */ - stfp->num_inputs, - stfp->input_to_index, - stfp->input_semantic_name, - stfp->input_semantic_index, - stfp->interp_mode, + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, /* outputs */ - stfp->num_outputs, - stfp->result_to_output, - stfp->output_semantic_name, - stfp->output_semantic_index, FALSE ); + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 67723de6d53..699b6e8ccb7 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -85,21 +85,6 @@ struct st_fragment_program { struct gl_fragment_program Base; struct glsl_to_tgsi_visitor* glsl_to_tgsi; - - /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */ - GLuint input_to_index[FRAG_ATTRIB_MAX]; - /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */ - GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - GLuint num_inputs; - GLuint interp_mode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ - - /** Maps FRAG_RESULT_x to slot */ - GLuint result_to_output[FRAG_RESULT_MAX]; - ubyte output_semantic_name[FRAG_RESULT_MAX]; - ubyte output_semantic_index[FRAG_RESULT_MAX]; - GLuint num_outputs; struct pipe_shader_state tgsi; From 482338842db6ad387316b52fbe9602eee56ad082 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Mon, 1 Aug 2011 13:06:06 -0700 Subject: [PATCH 258/600] Revert "glsl: Skip processing the first function's body in do_dead_functions()." opt_dead_functions contained a shortcut to skip processing the first function's body, based on the assumption that IR functions are topologically sorted, with callees always coming before their callers (therefore the first function cannot contain any calls). This assumption turns out not to be true in general. For example, the following code snippet gets translated to IR that violates this assumption: void f(); void g(); void f() { g(); } void g() { ... } In practice, the shortcut didn't cause bugs because of a coincidence of the circumstances in which opt_dead_functions is called: (a) we do inlining right before dead function elimination, and inlining (when successful) eliminates all calls. (b) for user-defined functions, inlining is always successful, because previous optimization passes (during compilation) have reduced them to a form that is eligible for inlining. (c) the function that appears first in the IR can't possibly call a built-in function, because built-in functions are always emitted before the function that calls them. It seems unnecessarily fragile to have opt_dead_functions depend on these coincidences. And the next patch in this series will break (c). So I'm reverting the shortcut. The consequence will be a slight increase in link time for complex shaders. This reverts commit c75427f4c8767e131e5fb3de44fbc9d904cb992d. Reviewed-by: Kenneth Graunke --- src/glsl/opt_dead_functions.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/glsl/opt_dead_functions.cpp b/src/glsl/opt_dead_functions.cpp index 7c64c618c0c..51c77e3b947 100644 --- a/src/glsl/opt_dead_functions.cpp +++ b/src/glsl/opt_dead_functions.cpp @@ -50,7 +50,6 @@ public: ir_dead_functions_visitor() { this->mem_ctx = ralloc_context(NULL); - this->seen_another_function_signature = false; } ~ir_dead_functions_visitor() @@ -65,8 +64,6 @@ public: bool (*predicate)(ir_instruction *ir); - bool seen_another_function_signature; - /* List of signature_entry */ exec_list signature_list; void *mem_ctx; @@ -97,13 +94,7 @@ ir_dead_functions_visitor::visit_enter(ir_function_signature *ir) entry->used = true; } - /* If this is the first signature to look at, no need to descend to see - * if it has calls to another function signature. - */ - if (!this->seen_another_function_signature) { - this->seen_another_function_signature = true; - return visit_continue_with_parent; - } + return visit_continue; } From 0d81b0e18494a80c4326fbc98837842959675869 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 29 Jul 2011 15:28:52 -0700 Subject: [PATCH 259/600] glsl: Emit function signatures at toplevel, even for built-ins. The ast-to-hir conversion needs to emit function signatures in two circumstances: when a function declaration (or definition) is encountered, and when a built-in function is encountered. To avoid emitting a function signature in an illegal place (such as inside a function), emit_function() checked whether we were inside a function definition, and if so, emitted the signature before the function definition. However, this didn't cover the case of emitting function signatures for built-in functions when those built-in functions are called from inside the constant integer expression that specifies the length of a global array. This failed because when processing an array length, we are emitting IR into a dummy exec_list (see process_array_type() in ast_to_hir.cpp). process_array_type() later checks (via an assertion) that no instructions were emitted to the dummy exec_list, based on the reasonable assumption that we shouldn't need to emit instructions to calculate the value of a constant. This patch changes emit_function() so that it emits function signatures at toplevel in all cases. This partially fixes bug 38625 (https://bugs.freedesktop.org/show_bug.cgi?id=38625). The remainder of the fix is in the patch that follows. Reviewed-by: Kenneth Graunke --- src/glsl/ast.h | 3 +-- src/glsl/ast_function.cpp | 2 +- src/glsl/ast_to_hir.cpp | 31 ++++++++++++++----------------- src/glsl/glsl_parser_extras.h | 6 ++++++ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 878f48b2070..d1de2271873 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -730,7 +730,6 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr, struct _mesa_glsl_parse_state *state); void -emit_function(_mesa_glsl_parse_state *state, exec_list *instructions, - ir_function *f); +emit_function(_mesa_glsl_parse_state *state, ir_function *f); #endif /* AST_H */ diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 8bcf48dfd91..34a82f8ab75 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -125,7 +125,7 @@ match_function_by_name(exec_list *instructions, const char *name, if (f == NULL) { f = new(ctx) ir_function(name); state->symbols->add_global_function(f); - emit_function(state, instructions, f); + emit_function(state, f); } f->add_signature(sig->clone_prototype(f, NULL)); diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 7da14611950..a6a0c328314 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -66,6 +66,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) state->current_function = NULL; + state->toplevel_ir = instructions; + /* Section 4.2 of the GLSL 1.20 specification states: * "The built-in functions are scoped in a scope outside the global scope * users declare global variables in. That is, a shader's global scope, @@ -85,6 +87,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) ast->hir(instructions, state); detect_recursion_unlinked(state, instructions); + + state->toplevel_ir = NULL; } @@ -2926,23 +2930,16 @@ ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters, void -emit_function(_mesa_glsl_parse_state *state, exec_list *instructions, - ir_function *f) +emit_function(_mesa_glsl_parse_state *state, ir_function *f) { - /* Emit the new function header */ - if (state->current_function == NULL) { - instructions->push_tail(f); - } else { - /* IR invariants disallow function declarations or definitions nested - * within other function definitions. Insert the new ir_function - * block in the instruction sequence before the ir_function block - * containing the current ir_function_signature. - */ - ir_function *const curr = - const_cast(state->current_function->function()); - - curr->insert_before(f); - } + /* IR invariants disallow function declarations or definitions + * nested within other function definitions. But there is no + * requirement about the relative order of function declarations + * and definitions with respect to one another. So simply insert + * the new ir_function block at the end of the toplevel instruction + * list. + */ + state->toplevel_ir->push_tail(f); } @@ -3069,7 +3066,7 @@ ast_function::hir(exec_list *instructions, return NULL; } - emit_function(state, instructions, f); + emit_function(state, f); } /* Verify the return type of main() */ diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 2f4d3cba77f..fc392da5b21 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -129,6 +129,12 @@ struct _mesa_glsl_parse_state { */ class ir_function_signature *current_function; + /** + * During AST to IR conversion, pointer to the toplevel IR + * instruction list being generated. + */ + exec_list *toplevel_ir; + /** Have we found a return statement in this function? */ bool found_return; From 789ee6516bfca289e1948ff8f2c147b94286a0e0 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sat, 30 Jul 2011 11:55:53 -0700 Subject: [PATCH 260/600] glsl: Constant-fold built-in functions before outputting IR Rearranged the logic for converting the ast for a function call to hir, so that we constant fold before emitting any IR. Previously we would emit some IR, and then only later detect whether we could constant fold. The unnecessary IR would usually get cleaned up by a later optimization step, however in the case of a builtin function being used to compute an array size, it was causing an assertion. Fixes Piglit test array-size-constant-relational.vert. Reviewed-by: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38625 --- src/glsl/ast_function.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 34a82f8ab75..5b6ed3bc8f5 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -199,6 +199,20 @@ match_function_by_name(exec_list *instructions, const char *name, */ ir_call *call = new(ctx) ir_call(sig, actual_parameters); if (!sig->return_type->is_void()) { + /* If the function call is a constant expression, don't + * generate the instructions to call it; just generate an + * ir_constant representing the constant value. + * + * Function calls can only be constant expressions starting + * in GLSL 1.20. + */ + if (state->language_version >= 120) { + ir_constant *const_val = call->constant_expression_value(); + if (const_val) { + return const_val; + } + } + ir_variable *var; ir_dereference_variable *deref; @@ -211,8 +225,6 @@ match_function_by_name(exec_list *instructions, const char *name, deref = new(ctx) ir_dereference_variable(var); ir_assignment *assign = new(ctx) ir_assignment(deref, call, NULL); instructions->push_tail(assign); - if (state->language_version >= 120) - var->constant_value = call->constant_expression_value(); deref = new(ctx) ir_dereference_variable(var); return deref; From d4144a123b603d3c33cb356cf3c8e5ae4653594e Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Mon, 1 Aug 2011 15:23:07 -0700 Subject: [PATCH 261/600] glsl: Check array size is const before asserting that no IR was generated. process_array_type() contains an assertion to verify that no IR instructions are generated while processing the expression that specifies the size of the array. This assertion needs to happen _after_ checking whether the expression is constant. Otherwise we may crash on an illegal shader rather than reporting an error. Fixes piglit tests array-size-non-builtin-function.vert and array-size-with-side-effect.vert. Reviewed-by: Kenneth Graunke --- src/glsl/ast_to_hir.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index a6a0c328314..2025911acd3 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -1769,11 +1769,6 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size, ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); YYLTYPE loc = array_size->get_location(); - /* FINISHME: Verify that the grammar forbids side-effects in array - * FINISHME: sizes. i.e., 'vec4 [x = 12] data' - */ - assert(dummy_instructions.is_empty()); - if (ir != NULL) { if (!ir->type->is_integer()) { _mesa_glsl_error(& loc, state, "array size must be integer type"); @@ -1790,6 +1785,14 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size, } else { assert(size->type == ir->type); length = size->value.u[0]; + + /* If the array size is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the array size isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); } } } From 01a851c296347d8e9d2166b3c83eab97404c0670 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 3 Aug 2011 16:16:59 -0700 Subject: [PATCH 262/600] glsl: When linking, emit functions at the tail of the final linked program. When link_functions.cpp adds a new function to the final linked program, it needs to add it after any global variable declarations that the function refers to, otherwise the IR will be invalid (because variable declarations must occur before variable accesses). The easiest way to do that is to have the linker emit functions to the tail of the final linked program. The linker used to emit functions to the head of the final linked program, in an effort to keep callees sorted before their callers. However, this was not reliable: it didn't work for functions declared or defined in the same compilation unit as main, for diamond-shaped patterns in the call graph, or for some obscure cases involving overloaded functions. And no code currently relies on this sort order. No Piglit regressions with i965 Ironlake. Reviewed-by: Kenneth Graunke --- src/glsl/link_functions.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp index d40f771e342..acee3271249 100644 --- a/src/glsl/link_functions.cpp +++ b/src/glsl/link_functions.cpp @@ -104,10 +104,12 @@ public: if (f == NULL) { f = new(linked) ir_function(name); - /* Add the new function to the linked IR. + /* Add the new function to the linked IR. Put it at the end + * so that it comes after any global variable declarations + * that it refers to. */ linked->symbols->add_function(f); - linked->ir->push_head(f); + linked->ir->push_tail(f); } ir_function_signature *linked_sig = From c148ef6ddb3dbf256c26d82ed2f45f1fde55a231 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 3 Aug 2011 15:37:01 -0700 Subject: [PATCH 263/600] glsl: validate IR after linking (debug builds only) At least one of the invariants verified by IR validation concerns the relative ordering of toplevel constructs in the IR: references to global variables must come after the declarations of those global variables. Since linking affects the ordering of toplevel constructs in the IR, it's possible that a bug in the linker will cause invalid IR to be generated, even if all the pre-linked shaders are valid. (In fact, such a bug was fixed by the previous commit.) Bugs like this are easily masked by further optimization passes, particularly inlining. So to make them easier to track down, this patch addes an IR validation step right after linking, and before final optimization occurs. The validation only occurs on debug builds. Reviewed-by: Kenneth Graunke --- src/glsl/linker.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 19eb9b5ff6f..b54ef41080a 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -922,6 +922,14 @@ link_intrastage_shaders(void *mem_ctx, free(linking_shaders); +#ifdef DEBUG + /* At this point linked should contain all of the linked IR, so + * validate it to make sure nothing went wrong. + */ + if (linked) + validate_ir_tree(linked->ir); +#endif + /* Make a pass over all variable declarations to ensure that arrays with * unspecified sizes have a size specified. The size is inferred from the * max_array_access field. From 36291173c20b7b90da8e765871efb37205786922 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 9 Aug 2011 10:39:52 +0100 Subject: [PATCH 264/600] docs: update GL3.txt with new GL 4.2 extensions --- docs/GL3.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/GL3.txt b/docs/GL3.txt index 135bc4bab67..c0cc4d172e0 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -114,6 +114,19 @@ GL_ARB_vertex_attrib_64bit not started GL_ARB_viewport_array not started +GL 4.2: +GLSL 4.2 not started +GL_ARB_texture_compression_bptc not started +GL_ARB_compressed_texture_pixel_storage not started +GL_ARB_shader_atomic_counters not started +GL_ARB_texture_storage not started +GL_ARB_transform_feedback_instanced not started +GL_ARB_base_instance not started +GL_ARB_shader_image_load_store not started +GL_ARB_conservative_depth not started (may be close to AMD_conservative_depth though) +GL_ARB_shading_language_420pack not started +GL_ARB_internalformat_query not started +GL_ARB_map_buffer_alignment not started More info about these features and the work involved can be found at From afd1d857752b5c30a3082068f8bb9002e0c69699 Mon Sep 17 00:00:00 2001 From: Fabio Pedretti Date: Tue, 9 Aug 2011 08:08:59 -0600 Subject: [PATCH 265/600] swrast: silence unused var warnings Signed-off-by: Brian Paul --- src/mesa/swrast/s_span.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index db102ac7946..9a91be39970 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr static INLINE void interpolate_int_colors(struct gl_context *ctx, SWspan *span) { +#if CHAN_BITS != 32 const GLuint n = span->end; GLuint i; -#if CHAN_BITS != 32 ASSERT(!(span->arrayMask & SPAN_RGBA)); #endif From e0496b63ff0d41a36812b78e9062e92590fcdd55 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 9 Aug 2011 08:58:20 -0600 Subject: [PATCH 266/600] glx: move declarations before code --- src/glx/dri2_glx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index 80e4da30beb..d9524d765bd 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -455,16 +455,20 @@ dri2_wait_gl(struct glx_context *gc) static void dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void *loaderPrivate) { + struct glx_display *priv; + struct dri2_display *pdp; + struct glx_context *gc; struct dri2_drawable *pdraw = loaderPrivate; + if (!pdraw) return; if (!pdraw->base.psc) return; - struct glx_display *priv = __glXInitialize(pdraw->base.psc->dpy); - struct dri2_display *pdp = (struct dri2_display *)priv->dri2Display; - struct glx_context *gc = __glXGetCurrentContext(); + priv = __glXInitialize(pdraw->base.psc->dpy); + pdp = (struct dri2_display *) priv->dri2Display; + gc = __glXGetCurrentContext(); /* Old servers don't send invalidate events */ if (!pdp->invalidateAvailable) From 971905bf394e4a6342c206d170abd6661ea38e21 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 9 Aug 2011 08:58:47 -0600 Subject: [PATCH 267/600] svga: add missing switch case for PIPE_SHADER_CAP_INTEGERS --- src/gallium/drivers/svga/svga_screen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 4be10ef5821..e0c11590df0 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -245,6 +245,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 0; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; } break; case PIPE_SHADER_VERTEX: From 32faaea743ca74f4ba29184ef44ebf2c0e962a46 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 9 Aug 2011 09:00:29 -0600 Subject: [PATCH 268/600] r300g: silence some warnings about uninitialized variables --- src/gallium/drivers/r300/compiler/radeon_program_alu.c | 2 +- src/gallium/drivers/r300/compiler/radeon_program_tex.c | 4 ++-- src/gallium/drivers/r300/r300_blit.c | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index 9fc991166a3..e273bc40c26 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -87,7 +87,7 @@ static struct rc_instruction *emit3( static struct rc_dst_register dstregtmpmask(int index, int mask) { - struct rc_dst_register dst = {0}; + struct rc_dst_register dst = {0, 0, 0}; dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c index 8d16b2cf9ec..9d69ebd18fb 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_tex.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c @@ -35,7 +35,7 @@ static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, int tmu) { - struct rc_src_register reg = { 0, }; + struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; if (compiler->enable_shadow_ambient) { reg.File = RC_FILE_CONSTANT; @@ -55,7 +55,7 @@ static struct rc_src_register shadow_fail_value(struct r300_fragment_program_com static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, int tmu) { - struct rc_src_register reg = { 0, }; + struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; reg.File = RC_FILE_NONE; reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index db97e496e19..e7a926829d1 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -409,10 +409,11 @@ void r300_decompress_zmask(struct r300_context *r300) void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) { - struct pipe_framebuffer_state fb = {0}; + struct pipe_framebuffer_state fb; + + memset(&fb, 0, sizeof(fb)); fb.width = r300->locked_zbuffer->width; fb.height = r300->locked_zbuffer->height; - fb.nr_cbufs = 0; fb.zsbuf = r300->locked_zbuffer; r300->context.set_framebuffer_state(&r300->context, &fb); @@ -421,8 +422,9 @@ void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) void r300_decompress_zmask_locked(struct r300_context *r300) { - struct pipe_framebuffer_state saved_fb = {0}; + struct pipe_framebuffer_state saved_fb; + memset(&saved_fb, 0, sizeof(saved_fb)); util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); r300_decompress_zmask_locked_unsafe(r300); r300->context.set_framebuffer_state(&r300->context, &saved_fb); From e6c64800cc8833fb4083a556c839b51e8ac84a8b Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 9 Aug 2011 12:23:47 -0500 Subject: [PATCH 269/600] glsl_to_tgsi: improve assignment hack Fixes StarCraft 2 and Fallout 3 in Wine. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d7a1ba80e1d..aef23e7d207 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1994,15 +1994,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->rhs->as_expression() && this->instructions.get_tail() && ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && - type_size(ir->lhs->type) == 1) { + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { /* To avoid emitting an extra MOV when assigning an expression to a * variable, emit the last instruction of the expression again, but * replace the destination register with the target of the assignment. * Dead code elimination will remove the original instruction. */ - glsl_to_tgsi_instruction *inst; + glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); From fa43477fa33c068915283d511b64e3d6470ccd73 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:27:33 -0700 Subject: [PATCH 270/600] mesa: Add a convenience interface for register allocator conflicts setup. --- src/mesa/program/register_allocate.c | 21 +++++++++++++++++++++ src/mesa/program/register_allocate.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index de96eb42c9b..f5b5174fc18 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) } } +/** + * Adds a conflict between base_reg and reg, and also between reg and + * anything that base_reg conflicts with. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg) +{ + int i; + + ra_add_reg_conflict(regs, reg, base_reg); + + for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { + ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); + } +} + unsigned int ra_alloc_reg_class(struct ra_regs *regs) { diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h index 5b95833f394..ee2e58a4756 100644 --- a/src/mesa/program/register_allocate.h +++ b/src/mesa/program/register_allocate.h @@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count); unsigned int ra_alloc_reg_class(struct ra_regs *regs); void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); +void ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg); void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg); void ra_set_finalize(struct ra_regs *regs); /** @} */ From bbcf13adbe525bd389a65ba15dd7831a56b8b13c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:31:01 -0700 Subject: [PATCH 271/600] i965/fs: Use the new convenience interface for setting up reg conflicts. That code I wrote was impenetrable, and hard to write the first time. This makes things a lot more obvious. --- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 78daa491156..f246ac49660 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -157,29 +157,14 @@ fs_visitor::assign_regs() classes[i] = ra_alloc_reg_class(regs); for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); - } + int class_reg = class_base_reg[i] + i_r; - /* Add conflicts between our contiguous registers aliasing - * base regs and other register classes' contiguous registers - * that alias base regs, or the base regs themselves for classes[0]. - */ - for (int c = 0; c <= i; c++) { - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); - c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); - c_r++) { + ra_class_add_reg(regs, classes[i], class_reg); - if (0) { - printf("%d/%d conflicts %d/%d\n", - class_sizes[i], first_assigned_grf + i_r, - class_sizes[c], first_assigned_grf + c_r); - } - - ra_add_reg_conflict(regs, - class_base_reg[i] + i_r, - class_base_reg[c] + c_r); - } + for (int base_reg = i_r; + base_reg < i_r + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(regs, base_reg, class_reg); } } } From b76378d46a211521582cfab56dc05031a57502a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:50:13 -0700 Subject: [PATCH 272/600] i965/fs: Eliminate the magic nature of virtual GRF 0. This was a debugging aid at one point -- virtual grf 0 should never be allocated, and it would be used if undefined register access occurred in codegen. However, it made the confusing register allocation code even more confusing by indexing things off of 1 all over. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 9 ++--- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 33 +++++++------------ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +-- 4 files changed, 17 insertions(+), 31 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f55be022f72..d57a67cc4fc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -181,9 +181,6 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); - - /* This slot is always unused. */ - virtual_grf_sizes[0] = 0; } virtual_grf_sizes[virtual_grf_next] = size; return virtual_grf_next++; @@ -985,7 +982,7 @@ fs_visitor::calculate_live_intervals() } } else { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && inst->src[i].reg != 0) { + if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; if (!loop_depth) { @@ -1001,7 +998,7 @@ fs_visitor::calculate_live_intervals() } } } - if (inst->dst.file == GRF && inst->dst.reg != 0) { + if (inst->dst.file == GRF) { int reg = inst->dst.reg; if (!loop_depth) { @@ -1715,7 +1712,7 @@ fs_visitor::run() if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = virtual_grf_next; - for (int i = 1; i < virtual_grf_count; i++) { + for (int i = 0; i < virtual_grf_count; i++) { spill_reg(i); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 96e1420038f..0375f672bec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -402,7 +402,7 @@ public: this->base_ir = NULL; this->virtual_grf_sizes = NULL; - this->virtual_grf_next = 1; + this->virtual_grf_next = 0; this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f246ac49660..83dd629aafb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -50,7 +50,7 @@ extern "C" { static void assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { - if (reg->file == GRF && reg->reg != 0) { + if (reg->file == GRF) { assert(reg->reg_offset >= 0); reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; reg->reg = 0; @@ -60,20 +60,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) void fs_visitor::assign_regs_trivial() { - int last_grf = 0; - int hw_reg_mapping[this->virtual_grf_next]; + int hw_reg_mapping[this->virtual_grf_next + 1]; int i; int reg_width = c->dispatch_width / 8; - hw_reg_mapping[0] = 0; /* Note that compressed instructions require alignment to 2 registers. */ - hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 2; i < this->virtual_grf_next; i++) { + hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); + for (i = 1; i <= this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * - reg_width); + this->grf_used = hw_reg_mapping[this->virtual_grf_next]; foreach_list(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; @@ -83,12 +80,11 @@ fs_visitor::assign_regs_trivial() assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - if (last_grf >= BRW_MAX_GRF) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + this->grf_used, BRW_MAX_GRF); } - this->grf_used = last_grf + reg_width; } bool @@ -101,7 +97,7 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next + 1]; + int hw_reg_mapping[this->virtual_grf_next]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; @@ -125,7 +121,7 @@ fs_visitor::assign_regs() */ class_sizes[class_count++] = 2; } - for (int r = 1; r < this->virtual_grf_next; r++) { + for (int r = 0; r < this->virtual_grf_next; r++) { int i; for (i = 0; i < class_count; i++) { @@ -195,12 +191,8 @@ fs_visitor::assign_regs() struct ra_graph *g = ra_alloc_interference_graph(regs, this->virtual_grf_next); - /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 - * with nodes. - */ - ra_set_node_class(g, 0, classes[0]); - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { if (aligned_pair_class >= 0 && @@ -213,7 +205,7 @@ fs_visitor::assign_regs() } } - for (int j = 1; j < i; j++) { + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -248,8 +240,7 @@ fs_visitor::assign_regs() * numbers. */ this->grf_used = first_assigned_grf; - hw_reg_mapping[0] = 0; /* unused */ - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); int hw_reg = -1; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2b769ccbba1..2e3f9be75b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir) this->result.type = brw_type_for_base_type(ir->type); if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); + assert(this->result.file == UNIFORM || this->result.file == GRF); this->result.reg_offset += index->value.i[0] * element_size; } else { assert(!"FINISHME: non-constant array element"); From 4e10d5825b31d2c58c0af3e29b7fc2eacb2b4709 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 5 May 2011 19:37:10 -0700 Subject: [PATCH 273/600] i965/fs: Simplify the register allocator using a map from RA reg to GRF. It's fewer pointers to track, and when we start caching the register set, should be algorithmically better in the cache hit case (lookup in a byte-per-register array, instead of a linear walk through desctiption of register classes to find how to translate that class). --- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 77 +++++++++---------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 83dd629aafb..42ab66df6d8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -102,7 +102,7 @@ fs_visitor::assign_regs() int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; - int aligned_pair_class = -1; + int aligned_pairs_class = -1; calculate_live_intervals(); @@ -137,52 +137,59 @@ fs_visitor::assign_regs() } } + /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; - int class_base_reg[class_count]; - int class_reg_count[class_count]; - int classes[class_count + 1]; - for (int i = 0; i < class_count; i++) { - class_base_reg[i] = ra_reg_count; - class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); - ra_reg_count += class_reg_count[i]; + ra_reg_count += base_reg_count - (class_sizes[i] - 1); } struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); + uint8_t ra_reg_to_grf[ra_reg_count]; + int classes[class_count + 1]; + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + int pairs_base_reg = 0; + int pairs_reg_count = 0; for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); classes[i] = ra_alloc_reg_class(regs); - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - int class_reg = class_base_reg[i] + i_r; + /* Save this off for the aligned pair class at the end. */ + if (class_sizes[i] == 2) { + pairs_base_reg = reg; + pairs_reg_count = class_reg_count; + } - ra_class_add_reg(regs, classes[i], class_reg); + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(regs, classes[i], reg); - for (int base_reg = i_r; - base_reg < i_r + class_sizes[i]; + ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, class_reg); + ra_add_transitive_reg_conflict(regs, base_reg, reg); } + + reg++; } } + assert(reg == ra_reg_count); /* Add a special class for aligned pairs, which we'll put delta_x/y * in on gen5 so that we can do PLN. */ if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - int reg_count = (base_reg_count - 1) / 2; - int unaligned_pair_class = 1; - assert(class_sizes[unaligned_pair_class] == 2); + aligned_pairs_class = ra_alloc_reg_class(regs); - aligned_pair_class = class_count; - classes[aligned_pair_class] = ra_alloc_reg_class(regs); - class_sizes[aligned_pair_class] = 2; - class_base_reg[aligned_pair_class] = 0; - class_reg_count[aligned_pair_class] = 0; - int start = (first_assigned_grf & 1) ? 1 : 0; - - for (int i = 0; i < reg_count; i++) { - ra_class_add_reg(regs, classes[aligned_pair_class], - class_base_reg[unaligned_pair_class] + i * 2 + start); + for (int i = 0; i < pairs_reg_count; i++) { + if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(regs, aligned_pairs_class, + pairs_base_reg + i); + } } class_count++; } @@ -195,9 +202,9 @@ fs_visitor::assign_regs() for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pair_class >= 0 && + if (aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, classes[aligned_pair_class]); + ra_set_node_class(g, i, aligned_pairs_class); } else { ra_set_node_class(g, i, classes[c]); } @@ -242,18 +249,8 @@ fs_visitor::assign_regs() this->grf_used = first_assigned_grf; for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - int hw_reg = -1; - for (int c = 0; c < class_count; c++) { - if (reg >= class_base_reg[c] && - reg < class_base_reg[c] + class_reg_count[c]) { - hw_reg = reg - class_base_reg[c]; - break; - } - } - - assert(hw_reg >= 0); - hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; + hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width; this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); From b1f0bffd399f377a19b0541e1d834afad8b9dad0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 May 2011 09:56:18 -0700 Subject: [PATCH 274/600] i965/fs: Factor out the register allocator setup to a separate function. Besides separating out a logical step of the giant register allocator function, this now communicates a bunch of the allocator information through entries in brw_context, which will make this code partially reusable for caching the expensive allocator setup. --- src/mesa/drivers/dri/i965/brw_context.h | 23 +++ .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 148 ++++++++++-------- 2 files changed, 105 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 22baf978ad4..cc11d06874d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -748,6 +748,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 42ab66df6d8..8e44a010576 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -87,6 +87,80 @@ fs_visitor::assign_regs_trivial() } +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int reg_width, + int base_reg_count) +{ + struct intel_context *intel = &brw->intel; + + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->wm.ra_reg_to_grf); + brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->wm.regs); + brw->wm.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->wm.classes); + brw->wm.classes = ralloc_array(brw, int, class_count + 1); + + brw->wm.aligned_pairs_class = -1; + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + int pairs_base_reg = 0; + int pairs_reg_count = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); + + /* Save this off for the aligned pair class at the end. */ + if (class_sizes[i] == 2) { + pairs_base_reg = reg; + pairs_reg_count = class_reg_count; + } + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); + + brw->wm.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + /* Add a special class for aligned pairs, which we'll put delta_x/y + * in on gen5 so that we can do PLN. + */ + if (brw->has_pln && reg_width == 1 && intel->gen < 6) { + brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); + + for (int i = 0; i < pairs_reg_count; i++) { + if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, + pairs_base_reg + i); + } + } + class_count++; + } + + ra_set_finalize(brw->wm.regs); +} + bool fs_visitor::assign_regs() { @@ -102,7 +176,6 @@ fs_visitor::assign_regs() int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; - int aligned_pairs_class = -1; calculate_live_intervals(); @@ -137,76 +210,20 @@ fs_visitor::assign_regs() } } - /* Compute the total number of registers across all classes. */ - int ra_reg_count = 0; - for (int i = 0; i < class_count; i++) { - ra_reg_count += base_reg_count - (class_sizes[i] - 1); - } + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, + reg_width, base_reg_count); - struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); - uint8_t ra_reg_to_grf[ra_reg_count]; - int classes[class_count + 1]; - - /* Now, add the registers to their classes, and add the conflicts - * between them and the base GRF registers (and also each other). - */ - int reg = 0; - int pairs_base_reg = 0; - int pairs_reg_count = 0; - for (int i = 0; i < class_count; i++) { - int class_reg_count = base_reg_count - (class_sizes[i] - 1); - classes[i] = ra_alloc_reg_class(regs); - - /* Save this off for the aligned pair class at the end. */ - if (class_sizes[i] == 2) { - pairs_base_reg = reg; - pairs_reg_count = class_reg_count; - } - - for (int j = 0; j < class_reg_count; j++) { - ra_class_add_reg(regs, classes[i], reg); - - ra_reg_to_grf[reg] = j; - - for (int base_reg = j; - base_reg < j + class_sizes[i]; - base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); - } - - reg++; - } - } - assert(reg == ra_reg_count); - - /* Add a special class for aligned pairs, which we'll put delta_x/y - * in on gen5 so that we can do PLN. - */ - if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - aligned_pairs_class = ra_alloc_reg_class(regs); - - for (int i = 0; i < pairs_reg_count; i++) { - if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { - ra_class_add_reg(regs, aligned_pairs_class, - pairs_base_reg + i); - } - } - class_count++; - } - - ra_set_finalize(regs); - - struct ra_graph *g = ra_alloc_interference_graph(regs, + struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_next); for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pairs_class >= 0 && + if (brw->wm.aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, aligned_pairs_class); + ra_set_node_class(g, i, brw->wm.aligned_pairs_class); } else { - ra_set_node_class(g, i, classes[c]); + ra_set_node_class(g, i, brw->wm.classes[c]); } break; } @@ -237,7 +254,6 @@ fs_visitor::assign_regs() ralloc_free(g); - ralloc_free(regs); return false; } @@ -250,7 +266,8 @@ fs_visitor::assign_regs() for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width; + hw_reg_mapping[i] = (first_assigned_grf + + brw->wm.ra_reg_to_grf[reg] * reg_width); this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); @@ -265,7 +282,6 @@ fs_visitor::assign_regs() } ralloc_free(g); - ralloc_free(regs); return true; } From c9e81fe14f36933617c862efb15ae09194485eab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 15 May 2011 09:36:19 -0700 Subject: [PATCH 275/600] i965: Drop the reg/hw_reg distinction. "reg" was set in only one case, virtual GRFs pre register allocation, and would be unset and have hw_reg set after allocation. Since we never bothered with looking at virtual GRF number after allocation anyway, just use the same storage and avoid confusion. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++---------- src/mesa/drivers/dri/i965/brw_fs.h | 21 +++++++----- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 ++-- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +-- .../dri/i965/brw_fs_schedule_instructions.cpp | 8 ++--- 5 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d57a67cc4fc..cafb7092ac8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -187,20 +187,20 @@ fs_visitor::virtual_grf_alloc(int size) } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg) +fs_reg::fs_reg(enum register_file file, int reg) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = BRW_REGISTER_TYPE_F; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) +fs_reg::fs_reg(enum register_file file, int reg, uint32_t type) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = type; } @@ -636,7 +636,7 @@ fs_visitor::assign_curb_setup() for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -810,7 +810,7 @@ fs_visitor::remove_dead_constants() fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; if (inst->src[i].file != UNIFORM) continue; @@ -862,13 +862,13 @@ fs_visitor::remove_dead_constants() fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; if (inst->src[i].file != UNIFORM) continue; assert(this->params_remap[constant_nr] != -1); - inst->src[i].hw_reg = this->params_remap[constant_nr]; + inst->src[i].reg = this->params_remap[constant_nr]; inst->src[i].reg_offset = 0; } } @@ -912,7 +912,7 @@ fs_visitor::setup_pull_constants() if (inst->src[i].file != UNIFORM) continue; - int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; if (uniform_nr < pull_uniform_base) continue; @@ -1374,9 +1374,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (inst->dst.reg & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (c->dispatch_width == 16 && (!inst->force_uncompressed && !inst->force_sechalf)) { @@ -1443,7 +1443,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->dst.reg = inst->dst.reg; scan_inst->saturate |= inst->saturate; inst->remove(); progress = true; @@ -1480,10 +1480,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.reg & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (c->dispatch_width == 16 && (!scan_inst->force_uncompressed && @@ -1555,7 +1555,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(); progress = true; @@ -1565,7 +1565,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.hw_reg] = NULL; + last_mrf_move[inst->dst.reg] = NULL; } if (inst->mlen > 0) { @@ -1591,7 +1591,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF && inst->src[0].file == GRF && !inst->predicated) { - last_mrf_move[inst->dst.hw_reg] = inst; + last_mrf_move[inst->dst.reg] = inst; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0375f672bec..4ec649014de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -51,7 +51,7 @@ enum register_file { MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[hw_reg] */ + UNIFORM, /* prog_data->params[reg] */ BAD_FILE }; @@ -99,7 +99,6 @@ public: void init() { memset(this, 0, sizeof(*this)); - this->hw_reg = -1; this->smear = -1; } @@ -146,8 +145,8 @@ public: this->type = fixed_hw_reg.type; } - fs_reg(enum register_file file, int hw_reg); - fs_reg(enum register_file file, int hw_reg, uint32_t type); + fs_reg(enum register_file file, int reg); + fs_reg(enum register_file file, int reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); bool equals(fs_reg *r) @@ -155,7 +154,6 @@ public: return (file == r->file && reg == r->reg && reg_offset == r->reg_offset && - hw_reg == r->hw_reg && type == r->type && negate == r->negate && abs == r->abs && @@ -167,12 +165,17 @@ public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; - /** virtual register number. 0 = fixed hw reg */ + /** + * Register number. For ARF/MRF, it's the hardware register. For + * GRF, it's a virtual register number until register allocation + */ int reg; - /** Offset within the virtual register. */ + /** + * For virtual registers, this is a hardware register offset from + * the start of the register block (for example, a constant index + * in an array access). + */ int reg_offset; - /** HW register number. Generally unset until register allocation. */ - int hw_reg; /** Register type. BRW_REGISTER_TYPE_* */ int type; bool negate; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 9fb0153d1f8..e168e541bef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -538,11 +538,9 @@ brw_reg_from_fs_reg(fs_reg *reg) case ARF: case MRF: if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); + brw_reg = brw_vec8_reg(reg->file, reg->reg, 0); } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); + brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 8e44a010576..5c9cba99ae5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -52,8 +52,8 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { if (reg->file == GRF) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; - reg->reg = 0; + reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; + reg->reg_offset = 0; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 9ec3f502764..f1a88fcfa79 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -321,12 +321,12 @@ instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -401,12 +401,12 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; From 09eeb0ff27005c0ffccd5cdbe46862e181a4ee6c Mon Sep 17 00:00:00 2001 From: Carl Simonson Date: Wed, 10 Aug 2011 11:10:43 -0700 Subject: [PATCH 276/600] i830: Add missing vtable entry for i830 from the hiz work. --- src/mesa/drivers/dri/i915/i830_vtbl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 6d43726beb1..ed5286fd7d9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state) i830_update_provoking_vertex(&intel->ctx); } +static bool +i830_is_hiz_depth_format(struct intel_context *intel, gl_format format) +{ + return false; +} + void i830InitVtbl(struct i830_context *i830) { @@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; i830->intel.vtbl.render_target_supported = i830_render_target_supported; + i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format; } From df7859be6b6b6c227e7a4e0b7fbfafcd0800f4f8 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Mon, 8 Aug 2011 12:32:13 +0300 Subject: [PATCH 277/600] r600g: Add support for ROUND, v2 This is a GLSL 1.3 feature, but also used by MLAA. Signed-off-by: Lauri Kasanen --- src/gallium/drivers/r600/r600_asm.c | 2 ++ src/gallium/drivers/r600/r600_shader.c | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 5fae2b00c8b..24af9917a6f 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -88,6 +88,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: return 1; default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); @@ -140,6 +141,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: return 1; default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c55cdd707eb..2551aa26f2a 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3243,7 +3243,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, @@ -3401,7 +3401,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, @@ -3559,7 +3559,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, From fa351bd2e0aecccd5ed6ef8744d5ba4a6dbf5d2c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 7 Aug 2011 17:04:04 -0700 Subject: [PATCH 278/600] intel: GetBuffer fix After copy buffer on preGEN6, it is necessary to wait for the blit to complete before returning data to the user. This should fix the piglit test: copy_buffer_coherency (pre-GEN6). Signed-off-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 439d6fc8247..703300b31af 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -282,12 +282,17 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, GLvoid * data, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); assert(intel_obj); if (intel_obj->sys_buffer) memcpy(data, (char *)intel_obj->sys_buffer + offset, size); - else + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); + } drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); + } } From e411cd7b0a54d2f9b9f4cda4918aa7742ed5c2a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4nger?= Date: Wed, 10 Aug 2011 08:07:29 +0000 Subject: [PATCH 279/600] swrast: initial multi-threaded span rendering Optional parallel rendering of spans using OpenMP. Initial implementation for aa triangles. A new option for scons is also provided to activate the openmp support (off by default). Signed-off-by: Brian Paul --- common.py | 1 + scons/gallium.py | 12 ++++++ src/mesa/swrast/s_aatritemp.h | 72 ++++++++++++++++++++-------------- src/mesa/swrast/s_context.c | 26 +++++++++--- src/mesa/swrast/s_texcombine.c | 4 ++ src/mesa/tnl/t_pipeline.c | 12 ++++++ 6 files changed, 91 insertions(+), 36 deletions(-) diff --git a/common.py b/common.py index 8657030ea3f..cfee1b5dc2e 100644 --- a/common.py +++ b/common.py @@ -88,6 +88,7 @@ def AddOptions(opts): opts.Add('toolchain', 'compiler toolchain', default_toolchain) opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) opts.Add(BoolOption('llvm', 'use LLVM', default_llvm)) + opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', 'no')) opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes')) opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no')) opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes')) diff --git a/scons/gallium.py b/scons/gallium.py index 8cd3bc7f6e0..7135251d7a3 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -596,6 +596,18 @@ def generate(env): libs += ['m', 'pthread', 'dl'] env.Append(LIBS = libs) + # OpenMP + if env['openmp']: + if env['msvc']: + env.Append(CCFLAGS = ['/openmp']) + # When building openmp release VS2008 link.exe crashes with LNK1103 error. + # Workaround: overwrite PDB flags with empty value as it isn't required anyways + if env['build'] == 'release': + env['PDB'] = '' + if env['gcc']: + env.Append(CCFLAGS = ['-fopenmp']) + env.Append(LIBS = ['gomp']) + # Load tools env.Tool('lex') env.Tool('yacc') diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h index 91d4f7a10ab..77b3ae6ec7a 100644 --- a/src/mesa/swrast/s_aatritemp.h +++ b/src/mesa/swrast/s_aatritemp.h @@ -181,13 +181,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, startX = (GLint) (x - xAdj); GLuint count; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* skip over fragments with zero coverage */ while (startX < MAX_WIDTH) { coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); @@ -228,13 +235,12 @@ coverage = compute_coveragef(pMin, pMid, pMax, ix, iy); } - if (ix <= startX) - continue; - - span.x = startX; - span.y = iy; - span.end = (GLuint) ix - (GLuint) startX; - _swrast_write_rgba_span(ctx, &span); + if (ix > startX) { + span.x = startX; + span.y = iy; + span.end = (GLuint) ix - (GLuint) startX; + _swrast_write_rgba_span(ctx, &span); + } } } else { @@ -244,13 +250,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, left, startX = (GLint) (x + xAdj); GLuint count, n; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* make sure we're not past the window edge */ if (startX >= ctx->DrawBuffer->_Xmax) { startX = ctx->DrawBuffer->_Xmax - 1; @@ -296,31 +309,30 @@ ATTRIB_LOOP_END #endif - if (startX <= ix) - continue; + if (startX > ix) { + n = (GLuint) startX - (GLuint) ix; - n = (GLuint) startX - (GLuint) ix; + left = ix + 1; - left = ix + 1; - - /* shift all values to the left */ - /* XXX this is temporary */ - { - SWspanarrays *array = span.array; - GLint j; - for (j = 0; j < (GLint) n; j++) { - array->coverage[j] = array->coverage[j + left]; - COPY_CHAN4(array->rgba[j], array->rgba[j + left]); + /* shift all values to the left */ + /* XXX this is temporary */ + { + SWspanarrays *array = span.array; + GLint j; + for (j = 0; j < (GLint) n; j++) { + array->coverage[j] = array->coverage[j + left]; + COPY_CHAN4(array->rgba[j], array->rgba[j + left]); #ifdef DO_Z - array->z[j] = array->z[j + left]; + array->z[j] = array->z[j + left]; #endif + } } - } - span.x = left; - span.y = iy; - span.end = n; - _swrast_write_rgba_span(ctx, &span); + span.x = left; + span.y = iy; + span.end = n; + _swrast_write_rgba_span(ctx, &span); + } } } } diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index def1531d7ff..4434f11b990 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -772,6 +772,11 @@ _swrast_CreateContext( struct gl_context *ctx ) { GLuint i; SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext)); +#ifdef _OPENMP + const GLint maxThreads = omp_get_max_threads(); +#else + const GLint maxThreads = 1; +#endif if (SWRAST_DEBUG) { _mesa_debug(ctx, "_swrast_CreateContext\n"); @@ -806,19 +811,25 @@ _swrast_CreateContext( struct gl_context *ctx ) for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) swrast->TextureSample[i] = NULL; - swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays); + /* SpanArrays is global and shared by all SWspan instances. However, when + * using multiple threads, it is necessary to have one SpanArrays instance + * per thread. + */ + swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays)); if (!swrast->SpanArrays) { FREE(swrast); return GL_FALSE; } - swrast->SpanArrays->ChanType = CHAN_TYPE; + for(i = 0; i < maxThreads; i++) { + swrast->SpanArrays[i].ChanType = CHAN_TYPE; #if CHAN_TYPE == GL_UNSIGNED_BYTE - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8; #elif CHAN_TYPE == GL_UNSIGNED_SHORT - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16; #else - swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0]; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0]; #endif + } /* init point span buffer */ swrast->PointSpan.primitive = GL_POINT; @@ -826,7 +837,10 @@ _swrast_CreateContext( struct gl_context *ctx ) swrast->PointSpan.facing = 0; swrast->PointSpan.array = swrast->SpanArrays; - swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * + /* TexelBuffer is also global and normally shared by all SWspan instances; + * when running with multiple threads, create one per thread. + */ + swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads * MAX_WIDTH * 4 * sizeof(GLfloat)); if (!swrast->TexelBuffer) { FREE(swrast->SpanArrays); diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 086ed0b33d7..80b9dff3cc2 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -48,7 +48,11 @@ typedef float (*float4_array)[4]; static INLINE float4_array get_texel_array(SWcontext *swrast, GLuint unit) { +#ifdef _OPENMP + return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num())); +#else return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); +#endif } diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 18f095f0d4b..881d5d5f535 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx ) _tnl_notify_pipeline_output_change( ctx ); } +#ifndef _OPENMP + /* Don't adjust FPU precision mode in case multiple threads are to be used. + * This would require that the additional threads also changed the FPU mode + * which is quite a mess as this had to be done in all parallelized sections; + * otherwise the master thread and all other threads are running in different + * modes, producing inconsistent results. + * Note that all x64 implementations don't define/use START_FAST_MATH, so + * this is "hack" is only used in i386 mode + */ START_FAST_MATH(__tmp); +#endif for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; @@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx ) break; } +#ifndef _OPENMP END_FAST_MATH(__tmp); +#endif } From 37a64baea87c470a68f9b2582af86783eb3509c4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Aug 2011 08:52:41 -0600 Subject: [PATCH 280/600] swrast: don't try to do depth testing if there's no depth buffer Fixes piglit hiz-depth-stencil-test-fbo-d0-s8 crash. See http://bugs.freedesktop.org/show_bug.cgi?id=37907 NOTE: This is a candidate for the 7.11 branch. --- src/mesa/swrast/s_stencil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c index 5bec71c057b..fa5093a3407 100644 --- a/src/mesa/swrast/s_stencil.c +++ b/src/mesa/swrast/s_stencil.c @@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face) * Some fragments passed the stencil test, apply depth test to them * and apply Zpass and Zfail stencil ops. */ - if (ctx->Depth.Test == GL_FALSE) { + if (ctx->Depth.Test == GL_FALSE || + ctx->DrawBuffer->_DepthBuffer == NULL) { /* * No depth buffer, just apply zpass stencil function to active pixels. */ From 9b8287f8f5398647ced3a52885233d58e548c2b7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Aug 2011 08:58:08 -0600 Subject: [PATCH 281/600] mesa: fix ColorMask array index in _mesa_init_driver_state() This doesn't really make any difference because all the colormasks are the same upon context set-up, but it makes more sense. --- src/mesa/drivers/common/driverfuncs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 76630264bf7..70f8727a092 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -248,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx) GLuint i; for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { ctx->Driver.ColorMaskIndexed(ctx, i, - ctx->Color.ColorMask[0][RCOMP], - ctx->Color.ColorMask[0][GCOMP], - ctx->Color.ColorMask[0][BCOMP], - ctx->Color.ColorMask[0][ACOMP]); + ctx->Color.ColorMask[i][RCOMP], + ctx->Color.ColorMask[i][GCOMP], + ctx->Color.ColorMask[i][BCOMP], + ctx->Color.ColorMask[i][ACOMP]); } } else { From 099aad2fb0dba8baff61dc7a6803c6c976c08069 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Aug 2011 09:02:16 -0600 Subject: [PATCH 282/600] mesa: fix initialization of GL_FOG_MODE in _mesa_init_driver_state() --- src/mesa/drivers/common/driverfuncs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 70f8727a092..a6174ee2f56 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -286,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx) ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE); ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0); + { + GLfloat mode = (GLfloat) ctx->Fog.Mode; + ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode); + } ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); From 9cd64ec35acd54cbe0be4d03236d2c5a9d4be6fe Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 10 Aug 2011 15:46:14 -0700 Subject: [PATCH 283/600] x86-64: Fix compile error with clang Remove the 'f' suffix from a float literal. - .float 0.0f+1.0 + .float 1.0 This fixes the following compile error with clang: error: unexpected token in directive .float 0.0f+1.0 ^ Note: This is a candidate for the stable branches. Reviewed-by: Ben Widawsky Signed-off-by: Chad Versace --- src/mesa/x86-64/xform4.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S index 6141e434679..5abd5a25de5 100644 --- a/src/mesa/x86-64/xform4.S +++ b/src/mesa/x86-64/xform4.S @@ -118,7 +118,7 @@ p4_constants: .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 -.float 0f+1.0 +.float 1.0 .text .align 16 From 3ce243879a72363ca3283fe6ab30381e05a9b4a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 12 Aug 2011 11:10:56 +0200 Subject: [PATCH 284/600] gallium/gbm: Add dependencies for libraries linked into pipe_*.so. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer --- src/gallium/targets/gbm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index b38782c4070..faacc89f1a0 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -146,7 +146,7 @@ $(GBM_EXTRA_TARGETS): $(TOP)/$(LIB_DIR)/gbm/%: % @$(INSTALL) -d $(dir $@) $(INSTALL) $< $(dir $@) -$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o +$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS) $(MKLIB) -o $@ -noprefix -linker '$(CC)' \ -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \ $(MKLIB_OPTIONS) $< \ From 281947b3511f606df365e0985631f31d237e63f1 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 7 Aug 2011 16:58:29 +0900 Subject: [PATCH 285/600] glapi: add gles_api.py Move the list of entry points belong to GLES from mapi_abi.py to a new file. Until we figure out how to describe the APIs an entry point belongs to in the XML file, and how to handle the case where an entry point others alias is missing in some APIs, this is an easier solution than maintaining another two sets of XMLs in glapi/gen-es/. Reviewed-by: Brian Paul --- src/mapi/glapi/gen/gl_and_es_API.xml | 5 + src/mapi/glapi/gen/gles_api.py | 452 +++++++++++++++++++++++++++ src/mapi/mapi/mapi_abi.py | 424 +------------------------ 3 files changed, 464 insertions(+), 417 deletions(-) create mode 100644 src/mapi/glapi/gen/gles_api.py diff --git a/src/mapi/glapi/gen/gl_and_es_API.xml b/src/mapi/glapi/gen/gl_and_es_API.xml index ac7d43ceda7..1313da0f5d6 100644 --- a/src/mapi/glapi/gen/gl_and_es_API.xml +++ b/src/mapi/glapi/gen/gl_and_es_API.xml @@ -3,6 +3,11 @@ + + diff --git a/src/mapi/glapi/gen/gles_api.py b/src/mapi/glapi/gen/gles_api.py new file mode 100644 index 00000000000..4cde9e544d5 --- /dev/null +++ b/src/mapi/glapi/gen/gles_api.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python + +# Mesa 3-D graphics library +# Version: 7.12 +# +# Copyright (C) 2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# Authors: +# Chia-I Wu + +# These info should be part of GLAPI XML. Until that is possible, scripts have +# to use tables here to filter gl_api. + +es1_core = ( + # OpenGL ES 1.1 + 'ActiveTexture', + 'AlphaFunc', + 'AlphaFuncx', + 'BindBuffer', + 'BindTexture', + 'BlendFunc', + 'BufferData', + 'BufferSubData', + 'Clear', + 'ClearColor', + 'ClearColorx', + 'ClearDepthf', + 'ClearDepthx', + 'ClearStencil', + 'ClientActiveTexture', + 'ClipPlanef', + 'ClipPlanex', + 'Color4f', + 'Color4ub', + 'Color4x', + 'ColorMask', + 'ColorPointer', + 'CompressedTexImage2D', + 'CompressedTexSubImage2D', + 'CopyTexImage2D', + 'CopyTexSubImage2D', + 'CullFace', + 'DeleteBuffers', + 'DeleteTextures', + 'DepthFunc', + 'DepthMask', + 'DepthRangef', + 'DepthRangex', + 'Disable', + 'DisableClientState', + 'DrawArrays', + 'DrawElements', + 'Enable', + 'EnableClientState', + 'Finish', + 'Flush', + 'Fogf', + 'Fogfv', + 'Fogx', + 'Fogxv', + 'FrontFace', + 'Frustumf', + 'Frustumx', + 'GenBuffers', + 'GenTextures', + 'GetBooleanv', + 'GetBufferParameteriv', + 'GetClipPlanef', + 'GetClipPlanex', + 'GetError', + 'GetFixedv', + 'GetFloatv', + 'GetIntegerv', + 'GetLightfv', + 'GetLightxv', + 'GetMaterialfv', + 'GetMaterialxv', + 'GetPointerv', + 'GetString', + 'GetTexEnvfv', + 'GetTexEnviv', + 'GetTexEnvxv', + 'GetTexParameterfv', + 'GetTexParameteriv', + 'GetTexParameterxv', + 'Hint', + 'IsBuffer', + 'IsEnabled', + 'IsTexture', + 'Lightf', + 'Lightfv', + 'LightModelf', + 'LightModelfv', + 'LightModelx', + 'LightModelxv', + 'Lightx', + 'Lightxv', + 'LineWidth', + 'LineWidthx', + 'LoadIdentity', + 'LoadMatrixf', + 'LoadMatrixx', + 'LogicOp', + 'Materialf', + 'Materialfv', + 'Materialx', + 'Materialxv', + 'MatrixMode', + 'MultiTexCoord4f', + 'MultiTexCoord4x', + 'MultMatrixf', + 'MultMatrixx', + 'Normal3f', + 'Normal3x', + 'NormalPointer', + 'Orthof', + 'Orthox', + 'PixelStorei', + 'PointParameterf', + 'PointParameterfv', + 'PointParameterx', + 'PointParameterxv', + 'PointSize', + 'PointSizex', + 'PolygonOffset', + 'PolygonOffsetx', + 'PopMatrix', + 'PushMatrix', + 'ReadPixels', + 'Rotatef', + 'Rotatex', + 'SampleCoverage', + 'SampleCoveragex', + 'Scalef', + 'Scalex', + 'Scissor', + 'ShadeModel', + 'StencilFunc', + 'StencilMask', + 'StencilOp', + 'TexCoordPointer', + 'TexEnvf', + 'TexEnvfv', + 'TexEnvi', + 'TexEnviv', + 'TexEnvx', + 'TexEnvxv', + 'TexImage2D', + 'TexParameterf', + 'TexParameterfv', + 'TexParameteri', + 'TexParameteriv', + 'TexParameterx', + 'TexParameterxv', + 'TexSubImage2D', + 'Translatef', + 'Translatex', + 'VertexPointer', + 'Viewport', +) + +es1_api = es1_core + ( + # GL_OES_EGL_image + 'EGLImageTargetTexture2DOES', + 'EGLImageTargetRenderbufferStorageOES', + # GL_OES_mapbuffer + 'GetBufferPointervOES', + 'MapBufferOES', + 'UnmapBufferOES', + # GL_EXT_multi_draw_arrays + 'MultiDrawArraysEXT', + 'MultiDrawElementsEXT', + # GL_OES_blend_equation_separate + 'BlendEquationSeparateOES', + # GL_OES_blend_func_separate + 'BlendFuncSeparateOES', + # GL_OES_blend_subtract + 'BlendEquationOES', + # GL_OES_draw_texture + 'DrawTexiOES', + 'DrawTexivOES', + 'DrawTexfOES', + 'DrawTexfvOES', + 'DrawTexsOES', + 'DrawTexsvOES', + 'DrawTexxOES', + 'DrawTexxvOES', + # GL_OES_fixed_point + 'AlphaFuncxOES', + 'ClearColorxOES', + 'ClearDepthxOES', + 'Color4xOES', + 'DepthRangexOES', + 'FogxOES', + 'FogxvOES', + 'FrustumxOES', + 'LightModelxOES', + 'LightModelxvOES', + 'LightxOES', + 'LightxvOES', + 'LineWidthxOES', + 'LoadMatrixxOES', + 'MaterialxOES', + 'MaterialxvOES', + 'MultiTexCoord4xOES', + 'MultMatrixxOES', + 'Normal3xOES', + 'OrthoxOES', + 'PointSizexOES', + 'PolygonOffsetxOES', + 'RotatexOES', + 'SampleCoveragexOES', + 'ScalexOES', + 'TexEnvxOES', + 'TexEnvxvOES', + 'TexParameterxOES', + 'TranslatexOES', + 'ClipPlanexOES', + 'GetClipPlanexOES', + 'GetFixedvOES', + 'GetLightxvOES', + 'GetMaterialxvOES', + 'GetTexEnvxvOES', + 'GetTexParameterxvOES', + 'PointParameterxOES', + 'PointParameterxvOES', + 'TexParameterxvOES', + # GL_OES_framebuffer_object + 'BindFramebufferOES', + 'BindRenderbufferOES', + 'CheckFramebufferStatusOES', + 'DeleteFramebuffersOES', + 'DeleteRenderbuffersOES', + 'FramebufferRenderbufferOES', + 'FramebufferTexture2DOES', + 'GenerateMipmapOES', + 'GenFramebuffersOES', + 'GenRenderbuffersOES', + 'GetFramebufferAttachmentParameterivOES', + 'GetRenderbufferParameterivOES', + 'IsFramebufferOES', + 'IsRenderbufferOES', + 'RenderbufferStorageOES', + # GL_OES_point_size_array + 'PointSizePointerOES', + # GL_OES_query_matrix + 'QueryMatrixxOES', + # GL_OES_single_precision + 'ClearDepthfOES', + 'DepthRangefOES', + 'FrustumfOES', + 'OrthofOES', + 'ClipPlanefOES', + 'GetClipPlanefOES', + # GL_OES_texture_cube_map + 'GetTexGenfvOES', + 'GetTexGenivOES', + 'GetTexGenxvOES', + 'TexGenfOES', + 'TexGenfvOES', + 'TexGeniOES', + 'TexGenivOES', + 'TexGenxOES', + 'TexGenxvOES', +) + +es2_core = ( + # OpenGL ES 2.0 + "ActiveTexture", + "AttachShader", + "BindAttribLocation", + "BindBuffer", + "BindFramebuffer", + "BindRenderbuffer", + "BindTexture", + "BlendColor", + "BlendEquation", + "BlendEquationSeparate", + "BlendFunc", + "BlendFuncSeparate", + "BufferData", + "BufferSubData", + "CheckFramebufferStatus", + "Clear", + "ClearColor", + "ClearDepthf", + "ClearStencil", + "ColorMask", + "CompileShader", + "CompressedTexImage2D", + "CompressedTexSubImage2D", + "CopyTexImage2D", + "CopyTexSubImage2D", + "CreateProgram", + "CreateShader", + "CullFace", + "DeleteBuffers", + "DeleteFramebuffers", + "DeleteProgram", + "DeleteRenderbuffers", + "DeleteShader", + "DeleteTextures", + "DepthFunc", + "DepthMask", + "DepthRangef", + "DetachShader", + "Disable", + "DisableVertexAttribArray", + "DrawArrays", + "DrawElements", + "Enable", + "EnableVertexAttribArray", + "Finish", + "Flush", + "FramebufferRenderbuffer", + "FramebufferTexture2D", + "FrontFace", + "GenBuffers", + "GenerateMipmap", + "GenFramebuffers", + "GenRenderbuffers", + "GenTextures", + "GetActiveAttrib", + "GetActiveUniform", + "GetAttachedShaders", + "GetAttribLocation", + "GetBooleanv", + "GetBufferParameteriv", + "GetError", + "GetFloatv", + "GetFramebufferAttachmentParameteriv", + "GetIntegerv", + "GetProgramInfoLog", + "GetProgramiv", + "GetRenderbufferParameteriv", + "GetShaderInfoLog", + "GetShaderiv", + "GetShaderPrecisionFormat", + "GetShaderSource", + "GetString", + "GetTexParameterfv", + "GetTexParameteriv", + "GetUniformfv", + "GetUniformiv", + "GetUniformLocation", + "GetVertexAttribfv", + "GetVertexAttribiv", + "GetVertexAttribPointerv", + "Hint", + "IsBuffer", + "IsEnabled", + "IsFramebuffer", + "IsProgram", + "IsRenderbuffer", + "IsShader", + "IsTexture", + "LineWidth", + "LinkProgram", + "PixelStorei", + "PolygonOffset", + "ReadPixels", + "ReleaseShaderCompiler", + "RenderbufferStorage", + "SampleCoverage", + "Scissor", + "ShaderBinary", + "ShaderSource", + "StencilFunc", + "StencilFuncSeparate", + "StencilMask", + "StencilMaskSeparate", + "StencilOp", + "StencilOpSeparate", + "TexImage2D", + "TexParameterf", + "TexParameterfv", + "TexParameteri", + "TexParameteriv", + "TexSubImage2D", + "Uniform1f", + "Uniform1fv", + "Uniform1i", + "Uniform1iv", + "Uniform2f", + "Uniform2fv", + "Uniform2i", + "Uniform2iv", + "Uniform3f", + "Uniform3fv", + "Uniform3i", + "Uniform3iv", + "Uniform4f", + "Uniform4fv", + "Uniform4i", + "Uniform4iv", + "UniformMatrix2fv", + "UniformMatrix3fv", + "UniformMatrix4fv", + "UseProgram", + "ValidateProgram", + "VertexAttrib1f", + "VertexAttrib1fv", + "VertexAttrib2f", + "VertexAttrib2fv", + "VertexAttrib3f", + "VertexAttrib3fv", + "VertexAttrib4f", + "VertexAttrib4fv", + "VertexAttribPointer", + "Viewport", +) + +es2_api = es2_core + ( + # GL_OES_EGL_image + 'EGLImageTargetTexture2DOES', + 'EGLImageTargetRenderbufferStorageOES', + # GL_OES_mapbuffer + 'GetBufferPointervOES', + 'MapBufferOES', + 'UnmapBufferOES', + # GL_EXT_multi_draw_arrays + 'MultiDrawArraysEXT', + 'MultiDrawElementsEXT', + # GL_OES_texture_3D + 'CompressedTexImage3DOES', + 'CompressedTexSubImage3DOES', + 'CopyTexSubImage3DOES', + 'FramebufferTexture3DOES', + 'TexImage3DOES', + 'TexSubImage3DOES', + # GL_OES_get_program_binary + 'GetProgramBinaryOES', + 'ProgramBinaryOES', +) diff --git a/src/mapi/mapi/mapi_abi.py b/src/mapi/mapi/mapi_abi.py index cb9fc0ef841..e3d3f6518ec 100644 --- a/src/mapi/mapi/mapi_abi.py +++ b/src/mapi/mapi/mapi_abi.py @@ -27,6 +27,11 @@ # Chia-I Wu import sys +# make it possible to import glapi +import os +GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0])) +sys.path.append(GLAPI) + import re from optparse import OptionParser @@ -128,9 +133,6 @@ class ABIEntry(object): def abi_parse_xml(xml): """Parse a GLAPI XML file for ABI entries.""" - import os - GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0])) - sys.path.append(GLAPI) import gl_XML, glX_XML api = gl_XML.parse_GL_API(xml, glX_XML.glx_item_factory()) @@ -749,255 +751,7 @@ class ES1APIPrinter(GLAPIPrinter): """OpenGL ES 1.x API Printer""" def __init__(self, entries): - es1_api = [ - # OpenGL ES 1.1 - 'ActiveTexture', - 'AlphaFunc', - 'AlphaFuncx', - 'BindBuffer', - 'BindTexture', - 'BlendFunc', - 'BufferData', - 'BufferSubData', - 'Clear', - 'ClearColor', - 'ClearColorx', - 'ClearDepthf', - 'ClearDepthx', - 'ClearStencil', - 'ClientActiveTexture', - 'ClipPlanef', - 'ClipPlanex', - 'Color4f', - 'Color4ub', - 'Color4x', - 'ColorMask', - 'ColorPointer', - 'CompressedTexImage2D', - 'CompressedTexSubImage2D', - 'CopyTexImage2D', - 'CopyTexSubImage2D', - 'CullFace', - 'DeleteBuffers', - 'DeleteTextures', - 'DepthFunc', - 'DepthMask', - 'DepthRangef', - 'DepthRangex', - 'Disable', - 'DisableClientState', - 'DrawArrays', - 'DrawElements', - 'Enable', - 'EnableClientState', - 'Finish', - 'Flush', - 'Fogf', - 'Fogfv', - 'Fogx', - 'Fogxv', - 'FrontFace', - 'Frustumf', - 'Frustumx', - 'GenBuffers', - 'GenTextures', - 'GetBooleanv', - 'GetBufferParameteriv', - 'GetClipPlanef', - 'GetClipPlanex', - 'GetError', - 'GetFixedv', - 'GetFloatv', - 'GetIntegerv', - 'GetLightfv', - 'GetLightxv', - 'GetMaterialfv', - 'GetMaterialxv', - 'GetPointerv', - 'GetString', - 'GetTexEnvfv', - 'GetTexEnviv', - 'GetTexEnvxv', - 'GetTexParameterfv', - 'GetTexParameteriv', - 'GetTexParameterxv', - 'Hint', - 'IsBuffer', - 'IsEnabled', - 'IsTexture', - 'Lightf', - 'Lightfv', - 'LightModelf', - 'LightModelfv', - 'LightModelx', - 'LightModelxv', - 'Lightx', - 'Lightxv', - 'LineWidth', - 'LineWidthx', - 'LoadIdentity', - 'LoadMatrixf', - 'LoadMatrixx', - 'LogicOp', - 'Materialf', - 'Materialfv', - 'Materialx', - 'Materialxv', - 'MatrixMode', - 'MultiTexCoord4f', - 'MultiTexCoord4x', - 'MultMatrixf', - 'MultMatrixx', - 'Normal3f', - 'Normal3x', - 'NormalPointer', - 'Orthof', - 'Orthox', - 'PixelStorei', - 'PointParameterf', - 'PointParameterfv', - 'PointParameterx', - 'PointParameterxv', - 'PointSize', - 'PointSizex', - 'PolygonOffset', - 'PolygonOffsetx', - 'PopMatrix', - 'PushMatrix', - 'ReadPixels', - 'Rotatef', - 'Rotatex', - 'SampleCoverage', - 'SampleCoveragex', - 'Scalef', - 'Scalex', - 'Scissor', - 'ShadeModel', - 'StencilFunc', - 'StencilMask', - 'StencilOp', - 'TexCoordPointer', - 'TexEnvf', - 'TexEnvfv', - 'TexEnvi', - 'TexEnviv', - 'TexEnvx', - 'TexEnvxv', - 'TexImage2D', - 'TexParameterf', - 'TexParameterfv', - 'TexParameteri', - 'TexParameteriv', - 'TexParameterx', - 'TexParameterxv', - 'TexSubImage2D', - 'Translatef', - 'Translatex', - 'VertexPointer', - 'Viewport', - # GL_OES_EGL_image - 'EGLImageTargetTexture2DOES', - 'EGLImageTargetRenderbufferStorageOES', - # GL_OES_mapbuffer - 'GetBufferPointervOES', - 'MapBufferOES', - 'UnmapBufferOES', - # GL_EXT_multi_draw_arrays - 'MultiDrawArraysEXT', - 'MultiDrawElementsEXT', - # GL_OES_blend_equation_separate - 'BlendEquationSeparateOES', - # GL_OES_blend_func_separate - 'BlendFuncSeparateOES', - # GL_OES_blend_subtract - 'BlendEquationOES', - # GL_OES_draw_texture - 'DrawTexiOES', - 'DrawTexivOES', - 'DrawTexfOES', - 'DrawTexfvOES', - 'DrawTexsOES', - 'DrawTexsvOES', - 'DrawTexxOES', - 'DrawTexxvOES', - # GL_OES_fixed_point - 'AlphaFuncxOES', - 'ClearColorxOES', - 'ClearDepthxOES', - 'Color4xOES', - 'DepthRangexOES', - 'FogxOES', - 'FogxvOES', - 'FrustumxOES', - 'LightModelxOES', - 'LightModelxvOES', - 'LightxOES', - 'LightxvOES', - 'LineWidthxOES', - 'LoadMatrixxOES', - 'MaterialxOES', - 'MaterialxvOES', - 'MultiTexCoord4xOES', - 'MultMatrixxOES', - 'Normal3xOES', - 'OrthoxOES', - 'PointSizexOES', - 'PolygonOffsetxOES', - 'RotatexOES', - 'SampleCoveragexOES', - 'ScalexOES', - 'TexEnvxOES', - 'TexEnvxvOES', - 'TexParameterxOES', - 'TranslatexOES', - 'ClipPlanexOES', - 'GetClipPlanexOES', - 'GetFixedvOES', - 'GetLightxvOES', - 'GetMaterialxvOES', - 'GetTexEnvxvOES', - 'GetTexParameterxvOES', - 'PointParameterxOES', - 'PointParameterxvOES', - 'TexParameterxvOES', - # GL_OES_framebuffer_object - 'BindFramebufferOES', - 'BindRenderbufferOES', - 'CheckFramebufferStatusOES', - 'DeleteFramebuffersOES', - 'DeleteRenderbuffersOES', - 'FramebufferRenderbufferOES', - 'FramebufferTexture2DOES', - 'GenerateMipmapOES', - 'GenFramebuffersOES', - 'GenRenderbuffersOES', - 'GetFramebufferAttachmentParameterivOES', - 'GetRenderbufferParameterivOES', - 'IsFramebufferOES', - 'IsRenderbufferOES', - 'RenderbufferStorageOES', - # GL_OES_point_size_array - 'PointSizePointerOES', - # GL_OES_query_matrix - 'QueryMatrixxOES', - # GL_OES_single_precision - 'ClearDepthfOES', - 'DepthRangefOES', - 'FrustumfOES', - 'OrthofOES', - 'ClipPlanefOES', - 'GetClipPlanefOES', - # GL_OES_texture_cube_map - 'GetTexGenfvOES', - 'GetTexGenivOES', - 'GetTexGenxvOES', - 'TexGenfOES', - 'TexGenfvOES', - 'TexGeniOES', - 'TexGenivOES', - 'TexGenxOES', - 'TexGenxvOES', - ] + from gles_api import es1_api super(ES1APIPrinter, self).__init__(entries, es1_api) self.prefix_lib = 'gl' @@ -1016,171 +770,7 @@ class ES2APIPrinter(GLAPIPrinter): """OpenGL ES 2.x API Printer""" def __init__(self, entries): - es2_api = [ - # OpenGL ES 2.0 - "ActiveTexture", - "AttachShader", - "BindAttribLocation", - "BindBuffer", - "BindFramebuffer", - "BindRenderbuffer", - "BindTexture", - "BlendColor", - "BlendEquation", - "BlendEquationSeparate", - "BlendFunc", - "BlendFuncSeparate", - "BufferData", - "BufferSubData", - "CheckFramebufferStatus", - "Clear", - "ClearColor", - "ClearDepthf", - "ClearStencil", - "ColorMask", - "CompileShader", - "CompressedTexImage2D", - "CompressedTexSubImage2D", - "CopyTexImage2D", - "CopyTexSubImage2D", - "CreateProgram", - "CreateShader", - "CullFace", - "DeleteBuffers", - "DeleteFramebuffers", - "DeleteProgram", - "DeleteRenderbuffers", - "DeleteShader", - "DeleteTextures", - "DepthFunc", - "DepthMask", - "DepthRangef", - "DetachShader", - "Disable", - "DisableVertexAttribArray", - "DrawArrays", - "DrawElements", - "Enable", - "EnableVertexAttribArray", - "Finish", - "Flush", - "FramebufferRenderbuffer", - "FramebufferTexture2D", - "FrontFace", - "GenBuffers", - "GenerateMipmap", - "GenFramebuffers", - "GenRenderbuffers", - "GenTextures", - "GetActiveAttrib", - "GetActiveUniform", - "GetAttachedShaders", - "GetAttribLocation", - "GetBooleanv", - "GetBufferParameteriv", - "GetError", - "GetFloatv", - "GetFramebufferAttachmentParameteriv", - "GetIntegerv", - "GetProgramInfoLog", - "GetProgramiv", - "GetRenderbufferParameteriv", - "GetShaderInfoLog", - "GetShaderiv", - "GetShaderPrecisionFormat", - "GetShaderSource", - "GetString", - "GetTexParameterfv", - "GetTexParameteriv", - "GetUniformfv", - "GetUniformiv", - "GetUniformLocation", - "GetVertexAttribfv", - "GetVertexAttribiv", - "GetVertexAttribPointerv", - "Hint", - "IsBuffer", - "IsEnabled", - "IsFramebuffer", - "IsProgram", - "IsRenderbuffer", - "IsShader", - "IsTexture", - "LineWidth", - "LinkProgram", - "PixelStorei", - "PolygonOffset", - "ReadPixels", - "ReleaseShaderCompiler", - "RenderbufferStorage", - "SampleCoverage", - "Scissor", - "ShaderBinary", - "ShaderSource", - "StencilFunc", - "StencilFuncSeparate", - "StencilMask", - "StencilMaskSeparate", - "StencilOp", - "StencilOpSeparate", - "TexImage2D", - "TexParameterf", - "TexParameterfv", - "TexParameteri", - "TexParameteriv", - "TexSubImage2D", - "Uniform1f", - "Uniform1fv", - "Uniform1i", - "Uniform1iv", - "Uniform2f", - "Uniform2fv", - "Uniform2i", - "Uniform2iv", - "Uniform3f", - "Uniform3fv", - "Uniform3i", - "Uniform3iv", - "Uniform4f", - "Uniform4fv", - "Uniform4i", - "Uniform4iv", - "UniformMatrix2fv", - "UniformMatrix3fv", - "UniformMatrix4fv", - "UseProgram", - "ValidateProgram", - "VertexAttrib1f", - "VertexAttrib1fv", - "VertexAttrib2f", - "VertexAttrib2fv", - "VertexAttrib3f", - "VertexAttrib3fv", - "VertexAttrib4f", - "VertexAttrib4fv", - "VertexAttribPointer", - "Viewport", - # GL_OES_EGL_image - 'EGLImageTargetTexture2DOES', - 'EGLImageTargetRenderbufferStorageOES', - # GL_OES_mapbuffer - 'GetBufferPointervOES', - 'MapBufferOES', - 'UnmapBufferOES', - # GL_EXT_multi_draw_arrays - 'MultiDrawArraysEXT', - 'MultiDrawElementsEXT', - # GL_OES_texture_3D - 'CompressedTexImage3DOES', - 'CompressedTexSubImage3DOES', - 'CopyTexSubImage3DOES', - 'FramebufferTexture3DOES', - 'TexImage3DOES', - 'TexSubImage3DOES', - # GL_OES_get_program_binary - 'GetProgramBinaryOES', - 'ProgramBinaryOES', - ] + from gles_api import es2_api super(ES2APIPrinter, self).__init__(entries, es2_api) self.prefix_lib = 'gl' From b8202b3d44b18a3db281c64d1ca01e851ae6deb1 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 7 Aug 2011 23:19:51 +0900 Subject: [PATCH 286/600] glapi: add methods to filter functions add gl_api::filter_functions and gl_function::filter_entry_points to filter out unwanted functions and entry points. Reviewed-by: Brian Paul --- src/mapi/glapi/gen/gl_XML.py | 46 ++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py index 4d414e8b0f8..4dc2e8fa7fb 100644 --- a/src/mapi/glapi/gen/gl_XML.py +++ b/src/mapi/glapi/gen/gl_XML.py @@ -618,7 +618,7 @@ class gl_function( gl_item ): # for each entry-point. Otherwise, they may generate code # that won't compile. - self.parameter_strings = {} + self.entry_point_parameters = {} self.process_element( element ) @@ -703,12 +703,34 @@ class gl_function( gl_item ): if element.children: self.initialized = 1 - self.parameter_strings[name] = create_parameter_string(parameters, 1) + self.entry_point_parameters[name] = parameters else: - self.parameter_strings[name] = None + self.entry_point_parameters[name] = [] return + def filter_entry_points(self, entry_point_list): + """Filter out entry points not in entry_point_list.""" + if not self.initialized: + raise RuntimeError('%s is not initialized yet' % self.name) + + entry_points = [] + for ent in self.entry_points: + if ent not in entry_point_list: + if ent in self.static_entry_points: + self.static_entry_points.remove(ent) + self.entry_point_parameters.pop(ent) + else: + entry_points.append(ent) + + if not entry_points: + raise RuntimeError('%s has no entry point after filtering' % self.name) + + self.entry_points = entry_points + if self.name not in entry_points: + # use the first remaining entry point + self.name = entry_points[0] + self.parameters = self.entry_point_parameters[entry_points[0]] def get_images(self): """Return potentially empty list of input images.""" @@ -721,11 +743,11 @@ class gl_function( gl_item ): def get_parameter_string(self, entrypoint = None): if entrypoint: - s = self.parameter_strings[ entrypoint ] - if s: - return s + params = self.entry_point_parameters[ entrypoint ] + else: + params = self.parameters - return create_parameter_string( self.parameters, 1 ) + return create_parameter_string( params, 1 ) def get_called_parameter_string(self): p_string = "" @@ -791,6 +813,16 @@ class gl_api: typeexpr.create_initial_types() return + def filter_functions(self, entry_point_list): + """Filter out entry points not in entry_point_list.""" + functions_by_name = {} + for func in self.functions_by_name.itervalues(): + entry_points = [ent for ent in func.entry_points if ent in entry_point_list] + if entry_points: + func.filter_entry_points(entry_points) + functions_by_name[func.name] = func + + self.functions_by_name = functions_by_name def process_element(self, doc): element = doc.children From 5076561b35b9c2c78f277ab03bf1e642094ee20e Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 8 Aug 2011 10:14:44 +0900 Subject: [PATCH 287/600] glapi: use gl_and_es_API.xml to generate GLES headers glapi/gen-es/ defines two sets of GLAPI XMLs for OpenGL ES 1.1 (es1_API.xml) and 2.0 (es2_API.xml) respectively. They are used to generate dispatch.h and remap_helper.h for GLES. Together with gl_and_es_API.xml, we have to maintain three sets of GLAPI XMLs. This commit makes dispatch.h and remap_helper.h for GLES be generated from gl_and_es_API.xml. Reviewed-by: Brian Paul --- src/mapi/glapi/gen-es/Makefile | 10 ++++++---- src/mapi/glapi/gen/Makefile | 6 ++---- src/mapi/glapi/gen/gl_table.py | 20 +++++++++++++++----- src/mapi/glapi/gen/remap_helper.py | 18 ++++++++++++++++-- src/mesa/SConscript | 16 ++++++++-------- 5 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile index bf66ec037cf..3fd539d26d5 100644 --- a/src/mapi/glapi/gen-es/Makefile +++ b/src/mapi/glapi/gen-es/Makefile @@ -11,8 +11,8 @@ OUTPUTS := \ COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py COMMON := $(addprefix $(GLAPI)/, $(COMMON)) -ES1_APIXML := es1_API.xml -ES2_APIXML := es2_API.xml +ES1_APIXML := $(GLAPI)/gl_and_es_API.xml +ES2_APIXML := $(GLAPI)/gl_and_es_API.xml ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api @@ -37,10 +37,12 @@ shared-glapi: $(SHARED_GLAPI_OUTPUTS) $(ES1_OUTPUTS): APIXML := $(ES1_APIXML) $(ES1_OUTPUTS): PRINTER := es1api +$(ES1_OUTPUTS): GLES_VER := es1 $(ES1_OUTPUTS): $(ES1_DEPS) $(ES2_OUTPUTS): APIXML := $(ES2_APIXML) $(ES2_OUTPUTS): PRINTER := es2api +$(ES2_OUTPUTS): GLES_VER := es2 $(ES2_OUTPUTS): $(ES2_DEPS) $(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML) @@ -49,7 +51,7 @@ $(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS) define gen-glapi @mkdir -p $(dir $@) - $(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) $(1) > $@ + $(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) -c $(GLES_VER) $(1) > $@ endef %/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON) @@ -58,7 +60,7 @@ endef --printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@ %/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON) - $(call gen-glapi,-c -m remap_table) + $(call gen-glapi,-m remap_table) %/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON) $(call gen-glapi) diff --git a/src/mapi/glapi/gen/Makefile b/src/mapi/glapi/gen/Makefile index 3e101f3a10f..c386b8766c4 100644 --- a/src/mapi/glapi/gen/Makefile +++ b/src/mapi/glapi/gen/Makefile @@ -180,10 +180,8 @@ $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON) ###################################################################### -$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(ES_API) - $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_API.xml \ - -f $(MESA_GLAPI_DIR)/gen-es/es1_API.xml \ - -f $(MESA_GLAPI_DIR)/gen-es/es2_API.xml > $@ +$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON_ES) + $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_es_API.xml > $@ $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@ diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index 05979e3813f..2cbbd971a86 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -211,28 +211,28 @@ class PrintRemapTable(gl_XML.gl_print_base): def show_usage(): - print "Usage: %s [-f input_file_name] [-m mode] [-c]" % sys.argv[0] + print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0] print " -m mode Mode can be 'table' or 'remap_table'." - print " -c Enable compatibility with OpenGL ES." + print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) if __name__ == '__main__': file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c") + (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:") except Exception,e: show_usage() mode = "table" - es = False + es = None for (arg,val) in args: if arg == "-f": file_name = val elif arg == "-m": mode = val elif arg == "-c": - es = True + es = val if mode == "table": printer = PrintGlTable(es) @@ -243,4 +243,14 @@ if __name__ == '__main__': api = gl_XML.parse_GL_API( file_name ) + if es is not None: + import gles_api + + api_map = { + 'es1': gles_api.es1_api, + 'es2': gles_api.es2_api, + } + + api.filter_functions(api_map[es]) + printer.Print( api ) diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index 69b8e5e9d02..367ae24c75c 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -197,22 +197,36 @@ class PrintGlRemap(gl_XML.gl_print_base): def show_usage(): - print "Usage: %s [-f input_file_name]" % sys.argv[0] + print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0] + print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) if __name__ == '__main__': file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:") + (args, trail) = getopt.getopt(sys.argv[1:], "f:c:") except Exception,e: show_usage() + es = None for (arg,val) in args: if arg == "-f": file_name = val + elif arg == "-c": + es = val api = gl_XML.parse_GL_API( file_name ) + if es is not None: + import gles_api + + api_map = { + 'es1': gles_api.es1_api, + 'es2': gles_api.es2_api, + } + + api.filter_functions(api_map[es]) + printer = PrintGlRemap() printer.Print( api ) diff --git a/src/mesa/SConscript b/src/mesa/SConscript index cbd16625186..ac85a3eeb05 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -349,26 +349,26 @@ if env['gles']: gles_headers += env.CodeGenerate( target = 'es1api/main/dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( target = 'es1api/main/remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( target = 'es2api/main/dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( target = 'es2api/main/remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET', ) env.Depends(gles_sources, gles_headers) From 786e5a2fb47e39c67a342eb22d84c665d058ee10 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 8 Aug 2011 10:40:12 +0900 Subject: [PATCH 288/600] glapi: add glapi_gen.mk to help header generation glapi_gen.mk is supposed to be included by glapi users to simplify header generation. This commit also makes es1api, es2api, and shared-glapi use it. Reviewed-by: Brian Paul [olv: updated after reviewing to prefix all variables in glapi_gen.mk by glapi_gen] --- src/mapi/es1api/Makefile | 25 ++++++++++--------- src/mapi/glapi/gen/glapi_gen.mk | 44 +++++++++++++++++++++++++++++++++ src/mapi/shared-glapi/Makefile | 9 +++---- 3 files changed, 61 insertions(+), 17 deletions(-) create mode 100644 src/mapi/glapi/gen/glapi_gen.mk diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile index aef694866c2..da5aa45806c 100644 --- a/src/mapi/es1api/Makefile +++ b/src/mapi/es1api/Makefile @@ -34,8 +34,6 @@ ESAPI = $(ES)api GLAPI := $(TOP)/src/mapi/glapi MAPI := $(TOP)/src/mapi/mapi -# directory for generated sources/headers -GEN := glapi esapi_CPPFLAGS := \ -I$(TOP)/include \ @@ -50,7 +48,7 @@ esapi_CPPFLAGS += -DMAPI_MODE_BRIDGE esapi_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(esapi_LIB_DEPS) .PHONY: default -default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) +default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) main/dispatch.h main/remap_helper.h $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME): $(esapi_OBJECTS) $(MKLIB) -o $(esapi_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ @@ -68,13 +66,17 @@ lib$(ESAPI).a: $(esapi_OBJECTS) $(esapi_OBJECTS): %.o: $(MAPI)/%.c $(CC) -c $(esapi_CPPFLAGS) $(CFLAGS) $< -o $@ -$(esapi_SOURCES): | glapi-stamp +$(esapi_SOURCES): glapi_mapi_tmp.h -.PHONY: glapi-stamp -glapi-stamp: - @# generate sources/headers - @$(MAKE) -C $(GLAPI)/gen-es $(ES) - @touch $@ +include $(GLAPI)/gen/glapi_gen.mk +glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) + $(call glapi_gen_mapi,$<,$(ESAPI)) + +main/dispatch.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,$(ES)) + +main/remap_helper.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,$(ES)) .PHONY: clean clean: @@ -83,9 +85,8 @@ clean: -rm -f lib$(ESAPI).a -rm -f $(esapi_OBJECTS) -rm -f depend depend.bak - -rm -f glapi-stamp - @# clean generated sources/headers - @$(MAKE) -C $(GLAPI)/gen-es clean-$(ES) + -rm -f glapi_mapi_tmp.h + -rm -rf main pcedit = \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ diff --git a/src/mapi/glapi/gen/glapi_gen.mk b/src/mapi/glapi/gen/glapi_gen.mk new file mode 100644 index 00000000000..c7fa7c0153d --- /dev/null +++ b/src/mapi/glapi/gen/glapi_gen.mk @@ -0,0 +1,44 @@ +# Helpers for glapi header generation + +ifndef TOP +$(error TOP must be defined.) +endif + +glapi_gen_common_deps := \ + $(wildcard $(TOP)/src/mapi/glapi/gen/*.xml) \ + $(wildcard $(TOP)/src/mapi/glapi/gen/*.py) + +glapi_gen_mapi_script := $(TOP)/src/mapi/mapi/mapi_abi.py +glapi_gen_mapi_deps := \ + $(glapi_gen_mapi_script) \ + $(glapi_gen_common_deps) + +# $(1): path to an XML file +# $(2): name of the printer +define glapi_gen_mapi +@mkdir -p $(dir $@) +$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_mapi_script) \ + --mode lib --printer $(2) $(1) > $@ +endef + +glapi_gen_dispatch_script := $(TOP)/src/mapi/glapi/gen/gl_table.py +glapi_gen_dispatch_deps := $(glapi_gen_common_deps) + +# $(1): path to an XML file +# $(2): empty, es1, or es2 for entry point filtering +define glapi_gen_dispatch +@mkdir -p $(dir $@) +$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_dispatch_script) \ + -f $(1) -m remap_table $(if $(2),-c $(2),) > $@ +endef + +glapi_gen_remap_script := $(TOP)/src/mapi/glapi/gen/remap_helper.py +glapi_gen_remap_deps := $(glapi_gen_common_deps) + +# $(1): path to an XML file +# $(2): empty, es1, or es2 for entry point filtering +define glapi_gen_remap +@mkdir -p $(dir $@) +$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_remap_script) \ + -f $(1) $(if $(2),-c $(2),) > $@ +endef diff --git a/src/mapi/shared-glapi/Makefile b/src/mapi/shared-glapi/Makefile index c928f822c81..3de864d891c 100644 --- a/src/mapi/shared-glapi/Makefile +++ b/src/mapi/shared-glapi/Makefile @@ -34,17 +34,16 @@ $(glapi_OBJECTS): %.o: $(MAPI)/%.c $(glapi_SOURCES): glapi_mapi_tmp.h -.PHONY: glapi_mapi_tmp.h -glapi_mapi_tmp.h: - @$(MAKE) -C $(GLAPI)/gen-es shared-glapi +include $(GLAPI)/gen/glapi_gen.mk +glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) + $(call glapi_gen_mapi,$<,shared-glapi) .PHONY: clean clean: -rm -f $(TOP)/$(LIB_DIR)/$(GLAPI_LIB_NAME) -rm -f $(glapi_OBJECTS) -rm -f depend depend.bak - @# clean generated sources/headers - @$(MAKE) -C $(GLAPI)/gen-es clean-shared-glapi + -rm -f glapi_mapi_tmp.h install: $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) From 6eff33dc7f2cd6e1430bd8dcaef4a7eb9fe3f6ee Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 11 Aug 2011 16:41:09 +0800 Subject: [PATCH 289/600] glapi: generate ES dispatch headers from core mesa GLESv1 and GLESv2 have their own dispatch.h and remap_helper.h. These headers are only used by api_exec_es1.c and api_exec_es2.c in core mesa. Move the rules to generate them from glapi to core mesa. Reviewed-by: Brian Paul [olv: updated after reviewing to fix SCons build] --- src/mapi/es1api/Makefile | 9 +-------- src/mesa/Makefile | 20 ++++++++++++++++++++ src/mesa/SConscript | 8 ++++---- src/mesa/main/es_generator.py | 4 ++-- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile index da5aa45806c..0a0449b10a3 100644 --- a/src/mapi/es1api/Makefile +++ b/src/mapi/es1api/Makefile @@ -48,7 +48,7 @@ esapi_CPPFLAGS += -DMAPI_MODE_BRIDGE esapi_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(esapi_LIB_DEPS) .PHONY: default -default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) main/dispatch.h main/remap_helper.h +default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME): $(esapi_OBJECTS) $(MKLIB) -o $(esapi_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ @@ -72,12 +72,6 @@ include $(GLAPI)/gen/glapi_gen.mk glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) $(call glapi_gen_mapi,$<,$(ESAPI)) -main/dispatch.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_dispatch_deps) - $(call glapi_gen_dispatch,$<,$(ES)) - -main/remap_helper.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_remap_deps) - $(call glapi_gen_remap,$<,$(ES)) - .PHONY: clean clean: -rm -f $(esapi_PC) @@ -86,7 +80,6 @@ clean: -rm -f $(esapi_OBJECTS) -rm -f depend depend.bak -rm -f glapi_mapi_tmp.h - -rm -rf main pcedit = \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 88f31b68695..0e15d61bd8d 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -67,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S # then convenience libs (.a) and finally the device drivers: default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs +# include glapi_gen.mk for generating glapi headers for GLES +GLAPI := $(TOP)/src/mapi/glapi/gen +include $(GLAPI)/glapi_gen.mk + +main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es1) + +main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es1) + +main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h + +main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es2) + +main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es2) + +main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h + main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@ diff --git a/src/mesa/SConscript b/src/mesa/SConscript index ac85a3eeb05..05aa0e8010e 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -347,25 +347,25 @@ if env['gles']: GLAPI = '#src/mapi/glapi/' gles_headers = [] gles_headers += env.CodeGenerate( - target = 'es1api/main/dispatch.h', + target = 'main/api_exec_es1_dispatch.h', script = GLAPI + 'gen/gl_table.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es1api/main/remap_helper.h', + target = 'main/api_exec_es1_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/dispatch.h', + target = 'main/api_exec_es2_dispatch.h', script = GLAPI + 'gen/gl_table.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/remap_helper.h', + target = 'main/api_exec_es2_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET', diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py index c0b0a445806..cad3deaef94 100644 --- a/src/mesa/main/es_generator.py +++ b/src/mesa/main/es_generator.py @@ -681,10 +681,10 @@ print """ #if FEATURE_remap_table /* define esLocalRemapTable */ -#include "%sapi/main/dispatch.h" +#include "main/api_exec_%s_dispatch.h" #define need_MESA_remap_table -#include "%sapi/main/remap_helper.h" +#include "main/api_exec_%s_remap_helper.h" static void init_remap_table(void) From e09b706c9e7d9308ad787e857a9456334426e5fd Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 7 Aug 2011 23:57:51 +0900 Subject: [PATCH 290/600] glapi: remove gen-es Not used anymore. --- src/mapi/glapi/gen-es/Makefile | 93 - src/mapi/glapi/gen-es/base1_API.xml | 744 ------ src/mapi/glapi/gen-es/base2_API.xml | 533 ----- src/mapi/glapi/gen-es/es1_API.xml | 1100 --------- src/mapi/glapi/gen-es/es1_COMPAT.xml | 135 -- src/mapi/glapi/gen-es/es1_EXT.xml | 699 ------ src/mapi/glapi/gen-es/es2_API.xml | 294 --- src/mapi/glapi/gen-es/es2_COMPAT.xml | 368 --- src/mapi/glapi/gen-es/es2_EXT.xml | 162 -- src/mapi/glapi/gen-es/es_COMPAT.xml | 2646 ---------------------- src/mapi/glapi/gen-es/es_EXT.xml | 125 - src/mapi/glapi/gen-es/gl_compare.py | 354 --- src/mapi/glapi/gen-es/gl_parse_header.py | 450 ---- 13 files changed, 7703 deletions(-) delete mode 100644 src/mapi/glapi/gen-es/Makefile delete mode 100644 src/mapi/glapi/gen-es/base1_API.xml delete mode 100644 src/mapi/glapi/gen-es/base2_API.xml delete mode 100644 src/mapi/glapi/gen-es/es1_API.xml delete mode 100644 src/mapi/glapi/gen-es/es1_COMPAT.xml delete mode 100644 src/mapi/glapi/gen-es/es1_EXT.xml delete mode 100644 src/mapi/glapi/gen-es/es2_API.xml delete mode 100644 src/mapi/glapi/gen-es/es2_COMPAT.xml delete mode 100644 src/mapi/glapi/gen-es/es2_EXT.xml delete mode 100644 src/mapi/glapi/gen-es/es_COMPAT.xml delete mode 100644 src/mapi/glapi/gen-es/es_EXT.xml delete mode 100644 src/mapi/glapi/gen-es/gl_compare.py delete mode 100644 src/mapi/glapi/gen-es/gl_parse_header.py diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile deleted file mode 100644 index 3fd539d26d5..00000000000 --- a/src/mapi/glapi/gen-es/Makefile +++ /dev/null @@ -1,93 +0,0 @@ -TOP = ../../../.. -MAPI = $(TOP)/src/mapi/mapi -GLAPI = ../gen -include $(TOP)/configs/current - -OUTPUTS := \ - glapi_mapi_tmp.h \ - main/dispatch.h \ - main/remap_helper.h - -COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py -COMMON := $(addprefix $(GLAPI)/, $(COMMON)) - -ES1_APIXML := $(GLAPI)/gl_and_es_API.xml -ES2_APIXML := $(GLAPI)/gl_and_es_API.xml -ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api -ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api - -ES1_DEPS = $(ES1_APIXML) base1_API.xml es1_EXT.xml es_EXT.xml \ - es1_COMPAT.xml es_COMPAT.xml -ES2_DEPS = $(ES2_APIXML) base2_API.xml es2_EXT.xml es_EXT.xml \ - es2_COMPAT.xml es_COMPAT.xml - -ES1_OUTPUTS := $(addprefix $(ES1_OUTPUT_DIR)/, $(OUTPUTS)) -ES2_OUTPUTS := $(addprefix $(ES2_OUTPUT_DIR)/, $(OUTPUTS)) - -SHARED_GLAPI_APIXML := $(GLAPI)/gl_and_es_API.xml -SHARED_GLAPI_OUTPUT_DIR := $(TOP)/src/mapi/shared-glapi -SHARED_GLAPI_DEPS := $(SHARED_GLAPI_APIXML) -SHARED_GLAPI_OUTPUTS = $(SHARED_GLAPI_OUTPUT_DIR)/glapi_mapi_tmp.h - -all: es1 es2 shared-glapi - -es1: $(ES1_OUTPUTS) -es2: $(ES2_OUTPUTS) -shared-glapi: $(SHARED_GLAPI_OUTPUTS) - -$(ES1_OUTPUTS): APIXML := $(ES1_APIXML) -$(ES1_OUTPUTS): PRINTER := es1api -$(ES1_OUTPUTS): GLES_VER := es1 -$(ES1_OUTPUTS): $(ES1_DEPS) - -$(ES2_OUTPUTS): APIXML := $(ES2_APIXML) -$(ES2_OUTPUTS): PRINTER := es2api -$(ES2_OUTPUTS): GLES_VER := es2 -$(ES2_OUTPUTS): $(ES2_DEPS) - -$(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML) -$(SHARED_GLAPI_OUTPUTS): PRINTER := shared-glapi -$(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS) - -define gen-glapi - @mkdir -p $(dir $@) - $(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) -c $(GLES_VER) $(1) > $@ -endef - -%/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON) - @mkdir -p $(dir $@) - $(PYTHON2) $(PYTHON_FLAGS) $< \ - --printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@ - -%/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON) - $(call gen-glapi,-m remap_table) - -%/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON) - $(call gen-glapi) - -verify_xml: - @if [ ! -f gl.h ]; then \ - echo "Please copy gl.h and gl2.h to this directory"; \ - exit 1; \ - fi - @echo "Verifying that es1_API.xml covers OpenGL ES 1.1..." - @$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl.h > tmp.xml - @$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es1_API.xml - @echo "Verifying that es2_API.xml covers OpenGL ES 2.0..." - @$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl2.h > tmp.xml - @$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es2_API.xml - @rm -f tmp.xml - -clean-es1: - -rm -f $(ES1_OUTPUTS) - -rm -rf $(ES1_OUTPUT_DIR)/main - -clean-es2: - -rm -f $(ES2_OUTPUTS) - -rm -rf $(ES2_OUTPUT_DIR)/main - -clean-shared-glapi: - -rm -f $(SHARED_GLAPI_OUTPUTS) - -clean: clean-es1 clean-es2 clean-shared-glapi - -rm -f *~ *.pyc *.pyo diff --git a/src/mapi/glapi/gen-es/base1_API.xml b/src/mapi/glapi/gen-es/base1_API.xml deleted file mode 100644 index 720be257ca2..00000000000 --- a/src/mapi/glapi/gen-es/base1_API.xml +++ /dev/null @@ -1,744 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/base2_API.xml b/src/mapi/glapi/gen-es/base2_API.xml deleted file mode 100644 index b59ef62de64..00000000000 --- a/src/mapi/glapi/gen-es/base2_API.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es1_API.xml b/src/mapi/glapi/gen-es/es1_API.xml deleted file mode 100644 index 3428ae5616a..00000000000 --- a/src/mapi/glapi/gen-es/es1_API.xml +++ /dev/null @@ -1,1100 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es1_COMPAT.xml b/src/mapi/glapi/gen-es/es1_COMPAT.xml deleted file mode 100644 index 096cea88d6f..00000000000 --- a/src/mapi/glapi/gen-es/es1_COMPAT.xml +++ /dev/null @@ -1,135 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es1_EXT.xml b/src/mapi/glapi/gen-es/es1_EXT.xml deleted file mode 100644 index c1e86373d8b..00000000000 --- a/src/mapi/glapi/gen-es/es1_EXT.xml +++ /dev/null @@ -1,699 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es2_API.xml b/src/mapi/glapi/gen-es/es2_API.xml deleted file mode 100644 index f8af63b94fe..00000000000 --- a/src/mapi/glapi/gen-es/es2_API.xml +++ /dev/null @@ -1,294 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es2_COMPAT.xml b/src/mapi/glapi/gen-es/es2_COMPAT.xml deleted file mode 100644 index 1bd3569635b..00000000000 --- a/src/mapi/glapi/gen-es/es2_COMPAT.xml +++ /dev/null @@ -1,368 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es2_EXT.xml b/src/mapi/glapi/gen-es/es2_EXT.xml deleted file mode 100644 index 4a67952e5c3..00000000000 --- a/src/mapi/glapi/gen-es/es2_EXT.xml +++ /dev/null @@ -1,162 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es_COMPAT.xml b/src/mapi/glapi/gen-es/es_COMPAT.xml deleted file mode 100644 index 7c729261105..00000000000 --- a/src/mapi/glapi/gen-es/es_COMPAT.xml +++ /dev/null @@ -1,2646 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/es_EXT.xml b/src/mapi/glapi/gen-es/es_EXT.xml deleted file mode 100644 index 0013df87e82..00000000000 --- a/src/mapi/glapi/gen-es/es_EXT.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mapi/glapi/gen-es/gl_compare.py b/src/mapi/glapi/gen-es/gl_compare.py deleted file mode 100644 index 6b5e43bb98b..00000000000 --- a/src/mapi/glapi/gen-es/gl_compare.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2009 Chia-I Wu -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -import sys -import os.path -import getopt - -GLAPI = "../../glapi/gen" -sys.path.append(GLAPI) - -import gl_XML -import glX_XML - -class ApiSet(object): - def __init__(self, api, elts=["enum", "type", "function"]): - self.api = api - self.elts = elts - - def _check_enum(self, e1, e2, strict=True): - if e1.name != e2.name: - raise ValueError("%s: name mismatch" % e1.name) - if e1.value != e2.value: - raise ValueError("%s: value 0x%04x != 0x%04x" - % (e1.name, e1.value, e2.value)) - - def _check_type(self, t1, t2, strict=True): - if t1.name != t2.name: - raise ValueError("%s: name mismatch" % t1.name) - if t1.type_expr.string() != t2.type_expr.string(): - raise ValueError("%s: type %s != %s" - % (t1.name, t1.type_expr.string(), t2.type_expr.string())) - - def _check_function(self, f1, f2, strict=True): - if f1.name != f2.name: - raise ValueError("%s: name mismatch" % f1.name) - if f1.return_type != f2.return_type: - raise ValueError("%s: return type %s != %s" - % (f1.name, f1.return_type, f2.return_type)) - # there might be padded parameters - if strict and len(f1.parameters) != len(f2.parameters): - raise ValueError("%s: parameter length %d != %d" - % (f1.name, len(f1.parameters), len(f2.parameters))) - if f1.assign_offset != f2.assign_offset: - if ((f1.assign_offset and f2.offset < 0) or - (f2.assign_offset and f1.offset < 0)): - raise ValueError("%s: assign offset %d != %d" - % (f1.name, f1.assign_offset, f2.assign_offset)) - elif not f1.assign_offset: - if f1.offset != f2.offset: - raise ValueError("%s: offset %d != %d" - % (f1.name, f1.offset, f2.offset)) - - if strict: - l1 = f1.entry_points - l2 = f2.entry_points - l1.sort() - l2.sort() - if l1 != l2: - raise ValueError("%s: entry points %s != %s" - % (f1.name, l1, l2)) - - l1 = f1.static_entry_points - l2 = f2.static_entry_points - l1.sort() - l2.sort() - if l1 != l2: - raise ValueError("%s: static entry points %s != %s" - % (f1.name, l1, l2)) - - pad = 0 - for i in xrange(len(f1.parameters)): - p1 = f1.parameters[i] - p2 = f2.parameters[i + pad] - - if not strict and p1.is_padding != p2.is_padding: - if p1.is_padding: - pad -= 1 - continue - else: - pad += 1 - p2 = f2.parameters[i + pad] - - if strict and p1.name != p2.name: - raise ValueError("%s: parameter %d name %s != %s" - % (f1.name, i, p1.name, p2.name)) - if p1.type_expr.string() != p2.type_expr.string(): - if (strict or - # special case - f1.name == "TexImage2D" and p1.name != "internalformat"): - raise ValueError("%s: parameter %s type %s != %s" - % (f1.name, p1.name, p1.type_expr.string(), - p2.type_expr.string())) - - def union(self, other): - union = gl_XML.gl_api(None) - - if "enum" in self.elts: - union.enums_by_name = other.enums_by_name.copy() - for key, val in self.api.enums_by_name.iteritems(): - if key not in union.enums_by_name: - union.enums_by_name[key] = val - else: - self._check_enum(val, other.enums_by_name[key]) - - if "type" in self.elts: - union.types_by_name = other.types_by_name.copy() - for key, val in self.api.types_by_name.iteritems(): - if key not in union.types_by_name: - union.types_by_name[key] = val - else: - self._check_type(val, other.types_by_name[key]) - - if "function" in self.elts: - union.functions_by_name = other.functions_by_name.copy() - for key, val in self.api.functions_by_name.iteritems(): - if key not in union.functions_by_name: - union.functions_by_name[key] = val - else: - self._check_function(val, other.functions_by_name[key]) - - return union - - def intersection(self, other): - intersection = gl_XML.gl_api(None) - - if "enum" in self.elts: - for key, val in self.api.enums_by_name.iteritems(): - if key in other.enums_by_name: - self._check_enum(val, other.enums_by_name[key]) - intersection.enums_by_name[key] = val - - if "type" in self.elts: - for key, val in self.api.types_by_name.iteritems(): - if key in other.types_by_name: - self._check_type(val, other.types_by_name[key]) - intersection.types_by_name[key] = val - - if "function" in self.elts: - for key, val in self.api.functions_by_name.iteritems(): - if key in other.functions_by_name: - self._check_function(val, other.functions_by_name[key]) - intersection.functions_by_name[key] = val - - return intersection - - def difference(self, other): - difference = gl_XML.gl_api(None) - - if "enum" in self.elts: - for key, val in self.api.enums_by_name.iteritems(): - if key not in other.enums_by_name: - difference.enums_by_name[key] = val - else: - self._check_enum(val, other.enums_by_name[key]) - - if "type" in self.elts: - for key, val in self.api.types_by_name.iteritems(): - if key not in other.types_by_name: - difference.types_by_name[key] = val - else: - self._check_type(val, other.types_by_name[key]) - - if "function" in self.elts: - for key, val in self.api.functions_by_name.iteritems(): - if key not in other.functions_by_name: - difference.functions_by_name[key] = val - else: - self._check_function(val, other.functions_by_name[key], False) - - return difference - -def cmp_enum(e1, e2): - if e1.value < e2.value: - return -1 - elif e1.value > e2.value: - return 1 - else: - return 0 - -def cmp_type(t1, t2): - return t1.size - t2.size - -def cmp_function(f1, f2): - if f1.name > f2.name: - return 1 - elif f1.name < f2.name: - return -1 - else: - return 0 - -def spaces(n, str=""): - spaces = n - len(str) - if spaces < 1: - spaces = 1 - return " " * spaces - -def output_enum(e, indent=0): - attrs = 'name="%s"' % e.name - if e.default_count > 0: - tab = spaces(37, attrs) - attrs += '%scount="%d"' % (tab, e.default_count) - tab = spaces(48, attrs) - val = "%04x" % e.value - val = "0x" + val.upper() - attrs += '%svalue="%s"' % (tab, val) - - # no child - if not e.functions: - print '%s' % (spaces(indent), attrs) - return - - print '%s' % (spaces(indent), attrs) - for key, val in e.functions.iteritems(): - attrs = 'name="%s"' % key - if val[0] != e.default_count: - attrs += ' count="%d"' % val[0] - if not val[1]: - attrs += ' mode="get"' - - print '%s' % (spaces(indent * 2), attrs) - - print '%s' % spaces(indent) - -def output_type(t, indent=0): - tab = spaces(16, t.name) - attrs = 'name="%s"%ssize="%d"' % (t.name, tab, t.size) - ctype = t.type_expr.string() - if ctype.find("unsigned") != -1: - attrs += ' unsigned="true"' - elif ctype.find("signed") == -1: - attrs += ' float="true"' - print '%s' % (spaces(indent), attrs) - -def output_function(f, indent=0): - attrs = 'name="%s"' % f.name - if f.offset > 0: - if f.assign_offset: - attrs += ' offset="assign"' - else: - attrs += ' offset="%d"' % f.offset - print '%s' % (spaces(indent), attrs) - - for p in f.parameters: - attrs = 'name="%s" type="%s"' \ - % (p.name, p.type_expr.original_string) - print '%s' % (spaces(indent * 2), attrs) - if f.return_type != "void": - attrs = 'type="%s"' % f.return_type - print '%s' % (spaces(indent * 2), attrs) - - print '%s' % spaces(indent) - -def output_category(api, indent=0): - enums = api.enums_by_name.values() - enums.sort(cmp_enum) - types = api.types_by_name.values() - types.sort(cmp_type) - functions = api.functions_by_name.values() - functions.sort(cmp_function) - - for e in enums: - output_enum(e, indent) - if enums and types: - print - for t in types: - output_type(t, indent) - if enums or types: - print - for f in functions: - output_function(f, indent) - if f != functions[-1]: - print - -def is_api_empty(api): - return bool(not api.enums_by_name and - not api.types_by_name and - not api.functions_by_name) - -def show_usage(ops): - print "Usage: %s [-k elts] <%s> " % (sys.argv[0], "|".join(ops)) - print " -k elts A comma separated string of types of elements to" - print " skip. Possible types are enum, type, and function." - sys.exit(1) - -def main(): - ops = ["union", "intersection", "difference"] - elts = ["enum", "type", "function"] - - try: - options, args = getopt.getopt(sys.argv[1:], "k:") - except Exception, e: - show_usage(ops) - - if len(args) != 3: - show_usage(ops) - op, file1, file2 = args - if op not in ops: - show_usage(ops) - - skips = [] - for opt, val in options: - if opt == "-k": - skips = val.split(",") - - for elt in skips: - try: - elts.remove(elt) - except ValueError: - show_usage(ops) - - api1 = gl_XML.parse_GL_API(file1, glX_XML.glx_item_factory()) - api2 = gl_XML.parse_GL_API(file2, glX_XML.glx_item_factory()) - - set = ApiSet(api1, elts) - func = getattr(set, op) - result = func(api2) - - if not is_api_empty(result): - cat_name = "%s_of_%s_and_%s" \ - % (op, os.path.basename(file1), os.path.basename(file2)) - - print '' - print '' % GLAPI - print - print '' - print - print '' % (cat_name) - output_category(result, 4) - print '' - print - print '' - -if __name__ == "__main__": - main() diff --git a/src/mapi/glapi/gen-es/gl_parse_header.py b/src/mapi/glapi/gen-es/gl_parse_header.py deleted file mode 100644 index 5382eba35c3..00000000000 --- a/src/mapi/glapi/gen-es/gl_parse_header.py +++ /dev/null @@ -1,450 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2009 Chia-I Wu -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -import sys -import os.path -import getopt -import re - -GLAPI = "../../glapi/gen" -sys.path.append(GLAPI) - -class HeaderParser(object): - """Parser for GL header files.""" - - def __init__(self, verbose=0): - # match #if and #ifdef - self.IFDEF = re.compile('#\s*if(n?def\s+(?P\w+)|\s+(?P.+))') - # match #endif - self.ENDIF = re.compile('#\s*endif') - # match typedef abc def; - self.TYPEDEF = re.compile('typedef\s+(?P[\w ]+)\s+(?P\w+);') - # match #define XYZ VAL - self.DEFINE = re.compile('#\s*define\s+(?P\w+)(?P\s+[\w"]*)?') - # match GLAPI - self.GLAPI = re.compile('^GL_?API(CALL)?\s+(?P[\w\s*]+[\w*])\s+(GL)?_?APIENTRY\s+(?P\w+)\s*\((?P[\w\s(,*\[\])]+)\)\s*;') - - self.split_params = re.compile('\s*,\s*') - self.split_ctype = re.compile('(\W)') - # ignore GL_VERSION_X_Y - self.ignore_enum = re.compile('GL(_ES)?_VERSION(_ES_C[ML])?_\d_\d') - - self.verbose = verbose - self._reset() - - def _reset(self): - """Reset to initial state.""" - self.ifdef_levels = [] - self.need_char = False - - # use typeexpr? - def _format_ctype(self, ctype, fix=True): - """Format a ctype string, optionally fix it.""" - # split the type string - tmp = self.split_ctype.split(ctype) - tmp = [s for s in tmp if s and s != " "] - - pretty = "" - for i in xrange(len(tmp)): - # add missing GL prefix - if (fix and tmp[i] != "const" and tmp[i] != "*" and - not tmp[i].startswith("GL")): - tmp[i] = "GL" + tmp[i] - - if i == 0: - pretty = tmp[i] - else: - sep = " " - if tmp[i - 1] == "*": - sep = "" - pretty += sep + tmp[i] - return pretty - - # use typeexpr? - def _get_ctype_attrs(self, ctype): - """Get the attributes of a ctype.""" - is_float = (ctype.find("float") != -1 or ctype.find("double") != -1) - is_signed = not (ctype.find("unsigned") != -1) - - size = 0 - if ctype.find("char") != -1: - size = 1 - elif ctype.find("short") != -1: - size = 2 - elif ctype.find("int") != -1: - size = 4 - elif is_float: - if ctype.find("float") != -1: - size = 4 - else: - size = 8 - - return (size, is_float, is_signed) - - def _parse_define(self, line): - """Parse a #define line for an .""" - m = self.DEFINE.search(line) - if not m: - if self.verbose and line.find("#define") >= 0: - print "ignore %s" % (line) - return None - - key = m.group("key").strip() - val = m.group("value").strip() - - # enum must begin with GL_ and be all uppercase - if ((not (key.startswith("GL_") and key.isupper())) or - (self.ignore_enum.match(key) and val == "1")): - if self.verbose: - print "ignore enum %s" % (key) - return None - - return (key, val) - - def _parse_typedef(self, line): - """Parse a typedef line for a .""" - m = self.TYPEDEF.search(line) - if not m: - if self.verbose and line.find("typedef") >= 0: - print "ignore %s" % (line) - return None - - f = m.group("from").strip() - t = m.group("to").strip() - if not t.startswith("GL"): - if self.verbose: - print "ignore type %s" % (t) - return None - attrs = self._get_ctype_attrs(f) - - return (f, t, attrs) - - def _parse_gl_api(self, line): - """Parse a GLAPI line for a .""" - m = self.GLAPI.search(line) - if not m: - if self.verbose and line.find("APIENTRY") >= 0: - print "ignore %s" % (line) - return None - - rettype = m.group("return") - rettype = self._format_ctype(rettype) - if rettype == "GLvoid": - rettype = "" - - name = m.group("name") - - param_str = m.group("params") - chunks = self.split_params.split(param_str) - chunks = [s.strip() for s in chunks] - if len(chunks) == 1 and (chunks[0] == "void" or chunks[0] == "GLvoid"): - chunks = [] - - params = [] - for c in chunks: - # split type and variable name - idx = c.rfind("*") - if idx < 0: - idx = c.rfind(" ") - if idx >= 0: - idx += 1 - ctype = c[:idx] - var = c[idx:] - else: - ctype = c - var = "unnamed" - - # convert array to pointer - idx = var.find("[") - if idx >= 0: - var = var[:idx] - ctype += "*" - - ctype = self._format_ctype(ctype) - var = var.strip() - - if not self.need_char and ctype.find("GLchar") >= 0: - self.need_char = True - - params.append((ctype, var)) - - return (rettype, name, params) - - def _change_level(self, line): - """Parse a #ifdef line and change level.""" - m = self.IFDEF.search(line) - if m: - ifdef = m.group("ifdef") - if not ifdef: - ifdef = m.group("if") - self.ifdef_levels.append(ifdef) - return True - m = self.ENDIF.search(line) - if m: - self.ifdef_levels.pop() - return True - return False - - def _read_header(self, header): - """Open a header file and read its contents.""" - lines = [] - try: - fp = open(header, "rb") - lines = fp.readlines() - fp.close() - except IOError, e: - print "failed to read %s: %s" % (header, e) - return lines - - def _cmp_enum(self, enum1, enum2): - """Compare two enums.""" - # sort by length of the values as strings - val1 = enum1[1] - val2 = enum2[1] - ret = len(val1) - len(val2) - # sort by the values - if not ret: - val1 = int(val1, 16) - val2 = int(val2, 16) - ret = val1 - val2 - # in case int cannot hold the result - if ret > 0: - ret = 1 - elif ret < 0: - ret = -1 - # sort by the names - if not ret: - if enum1[0] < enum2[0]: - ret = -1 - elif enum1[0] > enum2[0]: - ret = 1 - return ret - - def _cmp_type(self, type1, type2): - """Compare two types.""" - attrs1 = type1[2] - attrs2 = type2[2] - # sort by type size - ret = attrs1[0] - attrs2[0] - # float is larger - if not ret: - ret = attrs1[1] - attrs2[1] - # signed is larger - if not ret: - ret = attrs1[2] - attrs2[2] - # reverse - ret = -ret - return ret - - def _cmp_function(self, func1, func2): - """Compare two functions.""" - name1 = func1[1] - name2 = func2[1] - ret = 0 - # sort by the names - if name1 < name2: - ret = -1 - elif name1 > name2: - ret = 1 - return ret - - def _postprocess_dict(self, hdict): - """Post-process a header dict and return an ordered list.""" - hlist = [] - largest = 0 - for key, cat in hdict.iteritems(): - size = len(cat["enums"]) + len(cat["types"]) + len(cat["functions"]) - # ignore empty category - if not size: - continue - - cat["enums"].sort(self._cmp_enum) - # remove duplicates - dup = [] - for i in xrange(1, len(cat["enums"])): - if cat["enums"][i] == cat["enums"][i - 1]: - dup.insert(0, i) - for i in dup: - e = cat["enums"].pop(i) - if self.verbose: - print "remove duplicate enum %s" % e[0] - - cat["types"].sort(self._cmp_type) - cat["functions"].sort(self._cmp_function) - - # largest category comes first - if size > largest: - hlist.insert(0, (key, cat)) - largest = size - else: - hlist.append((key, cat)) - return hlist - - def parse(self, header): - """Parse a header file.""" - self._reset() - - if self.verbose: - print "Parsing %s" % (header) - - hdict = {} - lines = self._read_header(header) - for line in lines: - if self._change_level(line): - continue - - # skip until the first ifdef (i.e. __gl_h_) - if not self.ifdef_levels: - continue - - cat_name = os.path.basename(header) - # check if we are in an extension - if (len(self.ifdef_levels) > 1 and - self.ifdef_levels[-1].startswith("GL_")): - cat_name = self.ifdef_levels[-1] - - try: - cat = hdict[cat_name] - except KeyError: - cat = { - "enums": [], - "types": [], - "functions": [] - } - hdict[cat_name] = cat - - key = "enums" - elem = self._parse_define(line) - if not elem: - key = "types" - elem = self._parse_typedef(line) - if not elem: - key = "functions" - elem = self._parse_gl_api(line) - - if elem: - cat[key].append(elem) - - if self.need_char: - if self.verbose: - print "define GLchar" - elem = self._parse_typedef("typedef char GLchar;") - cat["types"].append(elem) - return self._postprocess_dict(hdict) - -def spaces(n, str=""): - spaces = n - len(str) - if spaces < 1: - spaces = 1 - return " " * spaces - -def output_xml(name, hlist): - """Output a parsed header in OpenGLAPI XML.""" - - for i in xrange(len(hlist)): - cat_name, cat = hlist[i] - - print '' % (cat_name) - indent = 4 - - for enum in cat["enums"]: - name = enum[0][3:] - value = enum[1] - tab = spaces(41, name) - attrs = 'name="%s"%svalue="%s"' % (name, tab, value) - print '%s' % (spaces(indent), attrs) - - if cat["enums"] and cat["types"]: - print - - for type in cat["types"]: - ctype = type[0] - size, is_float, is_signed = type[2] - - attrs = 'name="%s"' % (type[1][2:]) - attrs += spaces(16, attrs) + 'size="%d"' % (size) - if is_float: - attrs += ' float="true"' - elif not is_signed: - attrs += ' unsigned="true"' - - print '%s' % (spaces(indent), attrs) - - for func in cat["functions"]: - print - ret = func[0] - name = func[1][2:] - params = func[2] - - attrs = 'name="%s" offset="assign"' % name - print '%s' % (spaces(indent), attrs) - - for param in params: - attrs = 'name="%s" type="%s"' % (param[1], param[0]) - print '%s' % (spaces(indent * 2), attrs) - if ret: - attrs = 'type="%s"' % ret - print '%s' % (spaces(indent * 2), attrs) - - print '%s' % spaces(indent) - - print '' - print - -def show_usage(): - print "Usage: %s [-v]
..." % sys.argv[0] - sys.exit(1) - -def main(): - try: - args, headers = getopt.getopt(sys.argv[1:], "v") - except Exception, e: - show_usage() - if not headers: - show_usage() - - verbose = 0 - for arg in args: - if arg[0] == "-v": - verbose += 1 - - need_xml_header = True - parser = HeaderParser(verbose) - for h in headers: - h = os.path.abspath(h) - hlist = parser.parse(h) - - if need_xml_header: - print '' - print '' % GLAPI - need_xml_header = False - - print - print '' % (h) - print '' - print - output_xml(h, hlist) - print '' - -if __name__ == '__main__': - main() From f5e757ea60d9abb848d98af01e1986be3e35e236 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Sun, 14 Aug 2011 15:14:17 +0800 Subject: [PATCH 291/600] Destroy context in dri2/glx driver when apps call eglDestroyContext --- src/egl/drivers/dri2/egl_dri2.c | 21 +++++++++++++++++++-- src/egl/drivers/glx/egl_glx.c | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 9a37ea4bbfc..f09ae14f10c 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -725,6 +725,23 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, return NULL; } +/** + * Called via eglDestroyContext(), drv->API.DestroyContext(). + */ +static EGLBoolean +dri2_destroy_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) +{ + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + + if (_eglPutContext(ctx)) { + dri2_dpy->core->destroyContext(dri2_ctx->dri_context); + free(dri2_ctx); + } + + return EGL_TRUE; +} + /** * Called via eglMakeCurrent(), drv->API.MakeCurrent(). */ @@ -765,9 +782,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, drv->API.DestroySurface(drv, disp, old_dsurf); if (old_rsurf) drv->API.DestroySurface(drv, disp, old_rsurf); - /* no destroy? */ if (old_ctx) - _eglPutContext(old_ctx); + drv->API.DestroyContext(drv, disp, old_ctx); return EGL_TRUE; } else { @@ -1348,6 +1364,7 @@ _EGL_MAIN(const char *args) dri2_drv->base.API.Initialize = dri2_initialize; dri2_drv->base.API.Terminate = dri2_terminate; dri2_drv->base.API.CreateContext = dri2_create_context; + dri2_drv->base.API.DestroyContext = dri2_destroy_context; dri2_drv->base.API.MakeCurrent = dri2_make_current; dri2_drv->base.API.GetProcAddress = dri2_get_proc_address; dri2_drv->base.API.WaitClient = dri2_wait_client; diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c index 7cf8f4d5514..042936f960e 100644 --- a/src/egl/drivers/glx/egl_glx.c +++ b/src/egl/drivers/glx/egl_glx.c @@ -713,6 +713,24 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, return &GLX_ctx->Base; } +/** + * Called via eglDestroyContext(), drv->API.DestroyContext(). + */ +static EGLBoolean +GLX_eglDestroyContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) +{ + struct GLX_egl_driver *GLX_drv = GLX_egl_driver(drv); + struct GLX_egl_context *GLX_ctx = GLX_egl_context(ctx); + + if (_eglPutContext(ctx)) { + assert(GLX_ctx); + GLX_drv->glXDestroyContext(disp, ctx); + + free(GLX_ctx); + } + + return EGL_TRUE; +} /** * Destroy a surface. The display is allowed to be uninitialized. @@ -1142,6 +1160,7 @@ _EGL_MAIN(const char *args) GLX_drv->Base.API.Initialize = GLX_eglInitialize; GLX_drv->Base.API.Terminate = GLX_eglTerminate; GLX_drv->Base.API.CreateContext = GLX_eglCreateContext; + GLX_drv->Base.API.DestroyContext = GLX_eglDestroyContext; GLX_drv->Base.API.MakeCurrent = GLX_eglMakeCurrent; GLX_drv->Base.API.CreateWindowSurface = GLX_eglCreateWindowSurface; GLX_drv->Base.API.CreatePixmapSurface = GLX_eglCreatePixmapSurface; From 85fe9484945cb57ffd49df248b0e5057eba6af04 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Tue, 9 Aug 2011 14:23:18 +0200 Subject: [PATCH 292/600] egl: Native Display autodetection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EGL doesnt define howto manage different native platforms. So mesa has a builtime configurable default platform, whith non-standard envvar (EGL_PLATFORM) overwrites. This caused unneeded bugreports, when EGL_PLATFORM was forgotten. Detection is grouped into basic types of NativeDisplays (which itself needs to be detected). The final decision is based on characteristcs of these basic types: File Desciptor based platforms (fbdev): - fstat(2) to check for being a fd that belongs to a character device - check kernel subsystem (todo) Pointer to structuctures (x11, wayland, drm/gbm): - mincore(2) to check whether its valid pointer to some memory. - magic elements (e.g. pointers to exported symbols): o wayland display stores interface type pointer (first elm.) o gbm stores pointer to its constructor (first elm.) o x11 as a fallback (FIXME?) Reviewed-by: Kristian Høgsberg --- configure.ac | 2 + src/egl/main/Makefile | 13 +++++ src/egl/main/eglapi.c | 8 +-- src/egl/main/egldisplay.c | 100 ++++++++++++++++++++++++++++++++++++-- src/egl/main/egldisplay.h | 2 +- 5 files changed, 117 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 1b1823a211f..9195da9b3a3 100644 --- a/configure.ac +++ b/configure.ac @@ -1370,6 +1370,8 @@ if test "x$enable_egl" = xyes; then EGL_LIB_DEPS="$DLOPEN_LIBS $SELINUX_LIBS -lpthread" EGL_DRIVERS_DIRS="" + AC_CHECK_FUNC(mincore, [DEFINES="$DEFINES -DHAVE_MINCORE"]) + if test "$enable_static" != yes; then # build egl_glx when libGL is built if test "x$enable_glx" = xyes; then diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile index 775fbbe178b..c100fbfeb9c 100644 --- a/src/egl/main/Makefile +++ b/src/egl/main/Makefile @@ -93,6 +93,19 @@ ifeq ($(firstword $(EGL_PLATFORMS)),fbdev) EGL_NATIVE_PLATFORM=_EGL_PLATFORM_FBDEV endif +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_X11_PLATFORM +endif +ifneq ($(findstring wayland, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif +ifneq ($(findstring drm, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_DRM_PLATFORM +endif +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_FBDEV_PLATFORM +endif + LOCAL_CFLAGS += \ -D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \ -D_EGL_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\" diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 0ba7794e2c9..5d186c60e5a 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -301,7 +301,7 @@ _eglUnlockDisplay(_EGLDisplay *dpy) EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType nativeDisplay) { - _EGLPlatformType plat = _eglGetNativePlatform(); + _EGLPlatformType plat = _eglGetNativePlatform(nativeDisplay); _EGLDisplay *dpy = _eglFindDisplay(plat, (void *) nativeDisplay); return _eglGetDisplayHandle(dpy); } @@ -538,7 +538,7 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, EGLSurface ret; _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); - if (disp->Platform != _eglGetNativePlatform()) + if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay)) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE); surf = drv->API.CreateWindowSurface(drv, disp, conf, window, attrib_list); @@ -559,7 +559,7 @@ eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, EGLSurface ret; _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); - if (disp->Platform != _eglGetNativePlatform()) + if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay)) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_NO_SURFACE); surf = drv->API.CreatePixmapSurface(drv, disp, conf, pixmap, attrib_list); @@ -720,7 +720,7 @@ eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target) EGLBoolean ret; _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv); - if (disp->Platform != _eglGetNativePlatform()) + if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay)) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_FALSE); ret = drv->API.CopyBuffers(drv, disp, surf, target); diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index 60f31777272..2849dd96c64 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -43,6 +43,23 @@ #include "eglmutex.h" #include "egllog.h" +/* Includes for _eglNativePlatformDetectNativeDisplay */ +#ifdef HAVE_MINCORE +#include +#include +#endif +#ifdef HAVE_WAYLAND_PLATFORM +#include +#endif +#ifdef HAVE_DRM_PLATFORM +#include +#endif +#ifdef HAVE_FBDEV_PLATFORM +#include +#include +#include +#endif + /** * Return the native platform by parsing EGL_PLATFORM. @@ -83,18 +100,95 @@ _eglGetNativePlatformFromEnv(void) } +/** + * Perform validity checks on a generic pointer. + */ +static EGLBoolean +_eglPointerIsDereferencable(void *p) +{ +#ifdef HAVE_MINCORE + uintptr_t addr = (uintptr_t) p; + unsigned char valid = 0; + const long page_size = getpagesize(); + + if (p == NULL) + return EGL_FALSE; + + /* align addr to page_size */ + addr &= ~(page_size - 1); + + if (mincore((void *) addr, page_size, &valid) < 0) { + _eglLog(_EGL_DEBUG, "mincore failed: %m"); + return EGL_FALSE; + } + + return (valid & 0x01) == 0x01; +#else + return p != NULL; +#endif +} + + +/** + * Try detecting native platform with the help of native display characteristcs. + */ +static _EGLPlatformType +_eglNativePlatformDetectNativeDisplay(EGLNativeDisplayType nativeDisplay) +{ +#ifdef HAVE_FBDEV_PLATFORM + struct stat buf; +#endif + + if (nativeDisplay == EGL_DEFAULT_DISPLAY) + return _EGL_INVALID_PLATFORM; + +#ifdef HAVE_FBDEV_PLATFORM + /* fbdev is the only platform that can be a file descriptor. */ + if (fstat((intptr_t) nativeDisplay, &buf) == 0 && S_ISCHR(buf.st_mode)) + return _EGL_PLATFORM_FBDEV; +#endif + + if (_eglPointerIsDereferencable(nativeDisplay)) { + void *first_pointer = *(void **) nativeDisplay; + +#ifdef HAVE_WAYLAND_PLATFORM + /* wl_display is a wl_proxy, which is a wl_object. + * wl_object's first element points to the interfacetype. */ + if (first_pointer == &wl_display_interface) + return _EGL_PLATFORM_WAYLAND; +#endif + +#ifdef HAVE_DRM_PLATFORM + /* gbm has a pointer to its constructor as first element. */ + if (first_pointer == gbm_create_device) + return _EGL_PLATFORM_DRM; +#endif + +#ifdef HAVE_X11_PLATFORM + /* If not matched to any other platform, fallback to x11. */ + return _EGL_PLATFORM_X11; +#endif + } + + return _EGL_INVALID_PLATFORM; +} + + /** * Return the native platform. It is the platform of the EGL native types. */ _EGLPlatformType -_eglGetNativePlatform(void) +_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay) { static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM; if (native_platform == _EGL_INVALID_PLATFORM) { native_platform = _eglGetNativePlatformFromEnv(); - if (native_platform == _EGL_INVALID_PLATFORM) - native_platform = _EGL_NATIVE_PLATFORM; + if (native_platform == _EGL_INVALID_PLATFORM) { + native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay); + if (native_platform == _EGL_INVALID_PLATFORM) + native_platform = _EGL_NATIVE_PLATFORM; + } } return native_platform; diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 9cd4dbfcc8a..05335ada63c 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -150,7 +150,7 @@ struct _egl_display extern _EGLPlatformType -_eglGetNativePlatform(void); +_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay); extern void From 2e71c7d4ffa439dace639bd9c66174544dcd02d7 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Tue, 9 Aug 2011 16:16:33 +0200 Subject: [PATCH 293/600] egl: Log (debug) native platform type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Kristian Høgsberg --- src/egl/main/egldisplay.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index 2849dd96c64..512ad503f35 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -61,23 +61,27 @@ #endif +/** + * Map --with-egl-platforms names to platform types. + */ +static const struct { + _EGLPlatformType platform; + const char *name; +} egl_platforms[_EGL_NUM_PLATFORMS] = { + { _EGL_PLATFORM_WINDOWS, "gdi" }, + { _EGL_PLATFORM_X11, "x11" }, + { _EGL_PLATFORM_WAYLAND, "wayland" }, + { _EGL_PLATFORM_DRM, "drm" }, + { _EGL_PLATFORM_FBDEV, "fbdev" } +}; + + /** * Return the native platform by parsing EGL_PLATFORM. */ static _EGLPlatformType _eglGetNativePlatformFromEnv(void) { - /* map --with-egl-platforms names to platform types */ - static const struct { - _EGLPlatformType platform; - const char *name; - } egl_platforms[_EGL_NUM_PLATFORMS] = { - { _EGL_PLATFORM_WINDOWS, "gdi" }, - { _EGL_PLATFORM_X11, "x11" }, - { _EGL_PLATFORM_WAYLAND, "wayland" }, - { _EGL_PLATFORM_DRM, "drm" }, - { _EGL_PLATFORM_FBDEV, "fbdev" } - }; _EGLPlatformType plat = _EGL_INVALID_PLATFORM; const char *plat_name; EGLint i; @@ -181,16 +185,25 @@ _EGLPlatformType _eglGetNativePlatform(EGLNativeDisplayType nativeDisplay) { static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM; + char *detection_method = NULL; if (native_platform == _EGL_INVALID_PLATFORM) { native_platform = _eglGetNativePlatformFromEnv(); + detection_method = "environment overwrite"; if (native_platform == _EGL_INVALID_PLATFORM) { native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay); - if (native_platform == _EGL_INVALID_PLATFORM) + detection_method = "autodetected"; + if (native_platform == _EGL_INVALID_PLATFORM) { native_platform = _EGL_NATIVE_PLATFORM; + detection_method = "build-time configuration"; + } } } + if (detection_method != NULL) + _eglLog(_EGL_DEBUG, "Native platform type: %s (%s)", + egl_platforms[native_platform].name, detection_method); + return native_platform; } From 5c843785417225ba582878a15a5ae13b7b961aaa Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 1 Aug 2011 13:28:11 -0700 Subject: [PATCH 294/600] glsl: Make move_block_to_cond_assign not care which branch it's processing This will make some future changes a bit easier to digest. Reviewed-by: Eric Anholt --- src/glsl/lower_if_to_cond_assign.cpp | 44 +++++++++++----------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp index b637eb4fe1d..2d447e043a5 100644 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ b/src/glsl/lower_if_to_cond_assign.cpp @@ -94,40 +94,23 @@ check_control_flow(ir_instruction *ir, void *data) void move_block_to_cond_assign(void *mem_ctx, - ir_if *if_ir, ir_variable *cond_var, bool then) + ir_if *if_ir, ir_rvalue *cond_expr, + exec_list *instructions) { - exec_list *instructions; - - if (then) { - instructions = &if_ir->then_instructions; - } else { - instructions = &if_ir->else_instructions; - } - foreach_iter(exec_list_iterator, iter, *instructions) { ir_instruction *ir = (ir_instruction *)iter.get(); if (ir->ir_type == ir_type_assignment) { ir_assignment *assign = (ir_assignment *)ir; - ir_rvalue *cond_expr; - ir_dereference *deref = new(mem_ctx) ir_dereference_variable(cond_var); - - if (then) { - cond_expr = deref; - } else { - cond_expr = new(mem_ctx) ir_expression(ir_unop_logic_not, - glsl_type::bool_type, - deref, - NULL); - } if (!assign->condition) { - assign->condition = cond_expr; + assign->condition = cond_expr->clone(mem_ctx, NULL); } else { - assign->condition = new(mem_ctx) ir_expression(ir_binop_logic_and, - glsl_type::bool_type, - cond_expr, - assign->condition); + assign->condition = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->condition); } } @@ -187,8 +170,15 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) /* Now, move all of the instructions out of the if blocks, putting * conditions on assignments. */ - move_block_to_cond_assign(mem_ctx, ir, cond_var, true); - move_block_to_cond_assign(mem_ctx, ir, cond_var, false); + move_block_to_cond_assign(mem_ctx, ir, deref, + &ir->then_instructions); + + ir_rvalue *inverse = + new(mem_ctx) ir_expression(ir_unop_logic_not, + glsl_type::bool_type, + deref->clone(mem_ctx, NULL), + NULL); + move_block_to_cond_assign(mem_ctx, ir, inverse, &ir->else_instructions); ir->remove(); From 13df36ecb6c24ea36534fa0dfea917aca8233710 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 1 Aug 2011 13:36:12 -0700 Subject: [PATCH 295/600] glsl: Replace foreach_iter with foreach_list_safe Reviewed-by: Eric Anholt --- src/glsl/lower_if_to_cond_assign.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp index 2d447e043a5..5c74bc1816c 100644 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ b/src/glsl/lower_if_to_cond_assign.cpp @@ -97,8 +97,8 @@ move_block_to_cond_assign(void *mem_ctx, ir_if *if_ir, ir_rvalue *cond_expr, exec_list *instructions) { - foreach_iter(exec_list_iterator, iter, *instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list_safe(node, instructions) { + ir_instruction *ir = (ir_instruction *) node; if (ir->ir_type == ir_type_assignment) { ir_assignment *assign = (ir_assignment *)ir; From 4a026d6ba50034c326eb6cfd0b555f57d83ab609 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 1 Aug 2011 13:55:46 -0700 Subject: [PATCH 296/600] glsl: Slight change to the code generated by if-flattening Now the condition (for the then-clause) and the inverse condition (for the else-clause) get written to separate temporary variables. In the presence of complex conditions, this shouldn't result in more code being generated. If the original if-statement was if (a && b && c && d && e) { ... } else { ... } The lowered code will be if_to_cond_assign_then = a && b && c && d && e; ... if_to_cond_assign_else = !if_to_cond_assign_then; ... Reviewed-by: Eric Anholt --- src/glsl/lower_if_to_cond_assign.cpp | 58 ++++++++++++++++++---------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp index 5c74bc1816c..a7097556c14 100644 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ b/src/glsl/lower_if_to_cond_assign.cpp @@ -136,7 +136,6 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) return visit_continue; bool found_control_flow = false; - ir_variable *cond_var; ir_assignment *assign; ir_dereference_variable *deref; @@ -154,31 +153,50 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) void *mem_ctx = ralloc_parent(ir); - /* Store the condition to a variable so the assignment conditions are - * simpler. + /* Store the condition to a variable. Move all of the instructions from + * the then-clause of the if-statement. Use the condition variable as a + * condition for all assignments. */ - cond_var = new(mem_ctx) ir_variable(glsl_type::bool_type, - "if_to_cond_assign_condition", - ir_var_temporary); - ir->insert_before(cond_var); + ir_variable *const then_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_then", + ir_var_temporary); + ir->insert_before(then_var); - deref = new(mem_ctx) ir_dereference_variable(cond_var); - assign = new(mem_ctx) ir_assignment(deref, - ir->condition, NULL); + ir_dereference_variable *then_cond = + new(mem_ctx) ir_dereference_variable(then_var); + + assign = new(mem_ctx) ir_assignment(then_cond, ir->condition); ir->insert_before(assign); - /* Now, move all of the instructions out of the if blocks, putting - * conditions on assignments. - */ - move_block_to_cond_assign(mem_ctx, ir, deref, + move_block_to_cond_assign(mem_ctx, ir, then_cond, &ir->then_instructions); - ir_rvalue *inverse = - new(mem_ctx) ir_expression(ir_unop_logic_not, - glsl_type::bool_type, - deref->clone(mem_ctx, NULL), - NULL); - move_block_to_cond_assign(mem_ctx, ir, inverse, &ir->else_instructions); + /* If there are instructions in the else-clause, store the inverse of the + * condition to a variable. Move all of the instructions from the + * else-clause if the if-statement. Use the (inverse) condition variable + * as a condition for all assignments. + */ + if (!ir->else_instructions.is_empty()) { + ir_variable *const else_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_else", + ir_var_temporary); + ir->insert_before(else_var); + + ir_dereference_variable *else_cond = + new(mem_ctx) ir_dereference_variable(else_var); + + ir_rvalue *inverse = + new(mem_ctx) ir_expression(ir_unop_logic_not, + then_cond->clone(mem_ctx, NULL)); + + assign = new(mem_ctx) ir_assignment(else_cond, inverse); + ir->insert_before(assign); + + move_block_to_cond_assign(mem_ctx, ir, else_cond, + &ir->else_instructions); + } ir->remove(); From a352e2d08e0a141298275e77f25541218a97afb7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 1 Aug 2011 14:13:10 -0700 Subject: [PATCH 297/600] glsl: Modify strategy for accumulating conditions when lowering if-statements Previously if-statements were lowered from inner-most to outer-most (i.e., bottom-up). All assignments within an if-statement would have the condition of the if-statement appended to its existing condition. As a result the assignments from a deeply nested if-statement would have a very long and complex condition. Several shaders in the OpenGL ES2 conformance test suite contain non-constant array indexing that has been lowered by the shader writer. These tests usually look something like: if (i == 0) { value = array[0]; } else if (i == 1) { value = array[1]; } else ... The IR for the last assignment ends up as: (assign (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition) ) (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition@20) ) (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition@22) ) (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition@24) ) (var_ref if_to_cond_assign_condition@26) ) ) ) ) (x) (var_ref value) (array_ref (var_ref array) (constant int (5))) The Mesa IR that is generated from this is just as awesome as you might expect. Three changes are made to the way if-statements are lowered. 1. Two condition variables, if_to_cond_assign_then and if_to_cond_assign_else, are created for each if-then-else structure. The former contains the "positive" condition, and the later contains the "negative" condtion. This change was implemented in the previous patch. 2. Each condition variable is added to a hash-table when it is created. 3. When lowering an if-statement, assignments to existing condtion variables get the current condition anded. This ensures that nested condition variables are only set to true when the condition variable for all outer if-statements is also true. Changes #1 and #3 combine to ensure the correctness of the resulting code. 4. When a condition assignment is encountered with a condition that is a dereference of a previously added condition variable, the condition is not modified. Change #4 prevents the continuous accumulation of conditions on assignments. If the original if-statements were: if (x) { if (a && b && c && d && e) { ... } else { ... } } else { if (g && h && i && j && k) { ... } else { ... } } The lowered code will be if_to_cond_assign_then@1 = x; if_to_cond_assign_then@2 = a && b && c && d && e && if_to_cond_assign_then@1; ... if_to_cond_assign_else@2 = !if_to_cond_assign_then && if_to_cond_assign_then@1; ... if_to_cond_assign_else@1 = !if_to_cond_assign_then@1; if_to_cond_assign_then@3 = g && h && i && j; && if_to_cond_assign_else@1; ... if_to_cond_assign_else@3 = !if_to_cond_assign_then && if_to_cond_assign_else@1; ... Depending on how instructions are emitted, there may be an extra instruction due to the duplication of the '&& if_to_cond_assign_{then,else}@1' on the nested else conditions. In addition, this may cause some unnecessary register pressure since in the simple case (where the nested conditions are not complex) the nested then-condition variables are live longer than strictly necessary. Before this change, one of the shaders in the OpenGL ES2 conformance test suite's acos_float_frag_xvary generated 348 Mesa IR instructions. After this change it only generates 124. Many, but not all, of these instructions would have also been eliminated by CSE. Reviewed-by: Eric Anholt --- src/glsl/lower_if_to_cond_assign.cpp | 66 +++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp index a7097556c14..94b24db1b29 100644 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ b/src/glsl/lower_if_to_cond_assign.cpp @@ -47,6 +47,7 @@ #include "glsl_types.h" #include "ir.h" +#include "program/hash_table.h" class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { public: @@ -55,6 +56,14 @@ public: this->progress = false; this->max_depth = max_depth; this->depth = 0; + + this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~ir_if_to_cond_assign_visitor() + { + hash_table_dtor(this->condition_variables); } ir_visitor_status visit_enter(ir_if *); @@ -63,6 +72,8 @@ public: bool progress; unsigned max_depth; unsigned depth; + + struct hash_table *condition_variables; }; bool @@ -95,7 +106,8 @@ check_control_flow(ir_instruction *ir, void *data) void move_block_to_cond_assign(void *mem_ctx, ir_if *if_ir, ir_rvalue *cond_expr, - exec_list *instructions) + exec_list *instructions, + struct hash_table *ht) { foreach_list_safe(node, instructions) { ir_instruction *ir = (ir_instruction *) node; @@ -103,14 +115,33 @@ move_block_to_cond_assign(void *mem_ctx, if (ir->ir_type == ir_type_assignment) { ir_assignment *assign = (ir_assignment *)ir; - if (!assign->condition) { - assign->condition = cond_expr->clone(mem_ctx, NULL); - } else { - assign->condition = - new(mem_ctx) ir_expression(ir_binop_logic_and, - glsl_type::bool_type, - cond_expr->clone(mem_ctx, NULL), - assign->condition); + if (hash_table_find(ht, assign) == NULL) { + hash_table_insert(ht, assign, assign); + + /* If the LHS of the assignment is a condition variable that was + * previously added, insert an additional assignment of false to + * the variable. + */ + const bool assign_to_cv = + hash_table_find(ht, assign->lhs->variable_referenced()) != NULL; + + if (!assign->condition) { + if (assign_to_cv) { + assign->rhs = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->rhs); + } else { + assign->condition = cond_expr->clone(mem_ctx, NULL); + } + } else { + assign->condition = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->condition); + } } } @@ -125,6 +156,7 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) { (void) ir; this->depth++; + return visit_continue; } @@ -170,7 +202,13 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) ir->insert_before(assign); move_block_to_cond_assign(mem_ctx, ir, then_cond, - &ir->then_instructions); + &ir->then_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, then_var, then_var); /* If there are instructions in the else-clause, store the inverse of the * condition to a variable. Move all of the instructions from the @@ -195,7 +233,13 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) ir->insert_before(assign); move_block_to_cond_assign(mem_ctx, ir, else_cond, - &ir->else_instructions); + &ir->else_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, else_var, else_var); } ir->remove(); From 63720114b4234f5522eb8dee8f4b0c0db561a8c3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 5 Aug 2011 16:39:56 -0700 Subject: [PATCH 298/600] glw: Remove GLw source. libGLw is an old OpenGL widget library with optional Motif support. It almost never changes and very few people actually still care about it, so we've decided to ship it separately. The new home for libGLw is: git://git.freedesktop.org/mesa/glw/ Reviewed-by: Brian Paul --- configs/darwin | 1 - configs/default | 2 +- configs/freebsd-dri | 2 +- configs/linux-cell | 1 - configs/linux-dri-xcb | 2 +- configs/linux-indirect | 2 +- configure.ac | 76 ----- src/glw/GLwDrawA.c | 684 ----------------------------------------- src/glw/GLwDrawA.h | 195 ------------ src/glw/GLwDrawAP.h | 130 -------- src/glw/GLwMDrawA.c | 41 --- src/glw/GLwMDrawA.h | 41 --- src/glw/GLwMDrawAP.h | 41 --- src/glw/Makefile | 74 ----- src/glw/README | 56 ---- src/glw/glw.pc.in | 13 - 16 files changed, 4 insertions(+), 1357 deletions(-) delete mode 100644 src/glw/GLwDrawA.c delete mode 100644 src/glw/GLwDrawA.h delete mode 100644 src/glw/GLwDrawAP.h delete mode 100644 src/glw/GLwMDrawA.c delete mode 100644 src/glw/GLwMDrawA.h delete mode 100644 src/glw/GLwMDrawAP.h delete mode 100644 src/glw/Makefile delete mode 100644 src/glw/README delete mode 100644 src/glw/glw.pc.in diff --git a/configs/darwin b/configs/darwin index 41e7ba800bc..83f417ce0fd 100644 --- a/configs/darwin +++ b/configs/darwin @@ -50,7 +50,6 @@ GLU_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXt APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXmu -lXt -lXi -lm -# omit glw lib for now: SRC_DIRS = glsl mapi/glapi mapi/vgapi glx/apple mesa gallium glu GLU_DIRS = sgi DRIVER_DIRS = osmesa diff --git a/configs/default b/configs/default index e839a1e500a..078c85e8b4d 100644 --- a/configs/default +++ b/configs/default @@ -105,7 +105,7 @@ MOTIF_CFLAGS = -I/usr/include/Motif1.2 # Directories to build LIB_DIR = lib SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \ - gallium egl gallium/winsys gallium/targets glu glw + gallium egl gallium/winsys gallium/targets glu GLU_DIRS = sgi DRIVER_DIRS = x11 osmesa diff --git a/configs/freebsd-dri b/configs/freebsd-dri index fdf4b293acd..3c83872c08c 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -42,7 +42,7 @@ GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/local/lib -lGL -lXt -lX11 # Directories -SRC_DIRS = glx gallium mesa glu glw +SRC_DIRS = glx gallium mesa glu DRIVER_DIRS = dri DRM_SOURCE_PATH=$(TOP)/../drm diff --git a/configs/linux-cell b/configs/linux-cell index e87e69a8065..7f38da971d1 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -36,7 +36,6 @@ CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99 CXXFLAGS = $(COMMON_C_CPP_FLAGS) -# Omitting glw here: SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \ gallium gallium/winsys gallium/targets glu diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index 021aa3e8b20..15702da1904 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -49,7 +49,7 @@ DRI_LIB_DEPS = $(MESA_MODULES) $(EXTRA_LIB_PATH) -lm -lpthread -lexpat -ldl $(L GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lXxf86vm -lm -lpthread -ldl \ $(LIBDRM_LIB) $(shell pkg-config --libs xcb) $(shell pkg-config --libs x11-xcb) $(shell pkg-config --libs xcb-glx) -SRC_DIRS = glx gallium mesa glu glw +SRC_DIRS = glx gallium mesa glu DRIVER_DIRS = dri DRI_DIRS = i810 i915 mach64 mga r128 r200 r300 radeon \ diff --git a/configs/linux-indirect b/configs/linux-indirect index 82868c4a134..5592a8f2978 100644 --- a/configs/linux-indirect +++ b/configs/linux-indirect @@ -48,5 +48,5 @@ GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lXxf86vm -lm -lpthread -ldl # Directories -SRC_DIRS = glx glu glw +SRC_DIRS = glx glu DRIVER_DIRS = diff --git a/configure.ac b/configure.ac index 9195da9b3a3..6fa5e5177a4 100644 --- a/configure.ac +++ b/configure.ac @@ -359,7 +359,6 @@ fi GL_LIB_NAME='lib$(GL_LIB).'${LIB_EXTENSION} GLU_LIB_NAME='lib$(GLU_LIB).'${LIB_EXTENSION} GLUT_LIB_NAME='lib$(GLUT_LIB).'${LIB_EXTENSION} -GLW_LIB_NAME='lib$(GLW_LIB).'${LIB_EXTENSION} OSMESA_LIB_NAME='lib$(OSMESA_LIB).'${LIB_EXTENSION} EGL_LIB_NAME='lib$(EGL_LIB).'${LIB_EXTENSION} GLESv1_CM_LIB_NAME='lib$(GLESv1_CM_LIB).'${LIB_EXTENSION} @@ -372,7 +371,6 @@ GBM_LIB_NAME='lib$(GBM_LIB).'${LIB_EXTENSION} GL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' GLU_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLU_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' GLUT_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLUT_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLW_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLW_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' OSMESA_LIB_GLOB=${LIB_PREFIX_GLOB}'$(OSMESA_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' @@ -386,7 +384,6 @@ GBM_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GBM_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTE AC_SUBST([GL_LIB_NAME]) AC_SUBST([GLU_LIB_NAME]) AC_SUBST([GLUT_LIB_NAME]) -AC_SUBST([GLW_LIB_NAME]) AC_SUBST([OSMESA_LIB_NAME]) AC_SUBST([EGL_LIB_NAME]) AC_SUBST([GLESv1_CM_LIB_NAME]) @@ -399,7 +396,6 @@ AC_SUBST([GBM_LIB_NAME]) AC_SUBST([GL_LIB_GLOB]) AC_SUBST([GLU_LIB_GLOB]) AC_SUBST([GLUT_LIB_GLOB]) -AC_SUBST([GLW_LIB_GLOB]) AC_SUBST([OSMESA_LIB_GLOB]) AC_SUBST([EGL_LIB_GLOB]) AC_SUBST([GLESv1_CM_LIB_GLOB]) @@ -1604,77 +1600,6 @@ AC_SUBST([GLU_PC_REQ_PRIV]) AC_SUBST([GLU_PC_LIB_PRIV]) AC_SUBST([GLU_PC_CFLAGS]) -dnl -dnl GLw configuration -dnl -AC_ARG_ENABLE([glw], - [AS_HELP_STRING([--disable-glw], - [enable Xt/Motif widget library @<:@default=enabled@:>@])], - [enable_glw="$enableval"], - [enable_glw=yes]) -dnl Don't build GLw on osmesa -if test "x$enable_glw" = xyes -a "x$enable_glx" = xno; then - AC_MSG_NOTICE([Disabling GLw since there is no OpenGL driver]) - enable_glw=no -fi -AC_ARG_ENABLE([motif], - [AS_HELP_STRING([--enable-motif], - [use Motif widgets in GLw @<:@default=disabled@:>@])], - [enable_motif="$enableval"], - [enable_motif=no]) - -if test "x$enable_glw" = xyes; then - SRC_DIRS="$SRC_DIRS glw" - if test "$x11_pkgconfig" = yes; then - PKG_CHECK_MODULES([GLW],[x11 xt]) - GLW_PC_REQ_PRIV="x11 xt" - GLW_LIB_DEPS="$GLW_LIBS" - else - # should check these... - GLW_LIB_DEPS="$X_LIBS -lXt -lX11" - GLW_PC_LIB_PRIV="$GLW_LIB_DEPS" - GLW_PC_CFLAGS="$X11_INCLUDES" - fi - - GLW_SOURCES="GLwDrawA.c" - MOTIF_CFLAGS= - if test "x$enable_motif" = xyes; then - GLW_SOURCES="$GLW_SOURCES GLwMDrawA.c" - AC_PATH_PROG([MOTIF_CONFIG], [motif-config], [no]) - if test "x$MOTIF_CONFIG" != xno; then - MOTIF_CFLAGS=`$MOTIF_CONFIG --cflags` - MOTIF_LIBS=`$MOTIF_CONFIG --libs` - else - AC_CHECK_HEADER([Xm/PrimitiveP.h], [], - [AC_MSG_ERROR([Can't locate Motif headers])]) - AC_CHECK_LIB([Xm], [XmGetPixmap], [MOTIF_LIBS="-lXm"], - [AC_MSG_ERROR([Can't locate Motif Xm library])]) - fi - # MOTIF_LIBS is prepended to GLW_LIB_DEPS since Xm needs Xt/X11 - GLW_LIB_DEPS="$MOTIF_LIBS $GLW_LIB_DEPS" - GLW_PC_LIB_PRIV="$MOTIF_LIBS $GLW_PC_LIB_PRIV" - GLW_PC_CFLAGS="$MOTIF_CFLAGS $GLW_PC_CFLAGS" - fi - - # If static, empty GLW_LIB_DEPS and add libs for programs to link - GLW_PC_LIB_PRIV="$GLW_PC_LIB_PRIV" - if test "$enable_static" = no; then - GLW_MESA_DEPS='-l$(GL_LIB)' - GLW_LIB_DEPS="$GLW_LIB_DEPS" - else - APP_LIB_DEPS="$APP_LIB_DEPS $GLW_LIB_DEPS" - GLW_LIB_DEPS="" - GLW_MESA_DEPS="" - fi -fi -AC_SUBST([GLW_LIB_DEPS]) -AC_SUBST([GLW_MESA_DEPS]) -AC_SUBST([GLW_SOURCES]) -AC_SUBST([MOTIF_CFLAGS]) -AC_SUBST([GLW_PC_REQ_PRIV]) -AC_SUBST([GLW_PC_LIB_PRIV]) -AC_SUBST([GLW_PC_CFLAGS]) - dnl dnl Program library dependencies dnl Only libm is added here if necessary as the libraries should @@ -2053,7 +1978,6 @@ esac echo "" echo " GLU: $enable_glu" -echo " GLw: $enable_glw (Motif: $enable_motif)" dnl EGL echo "" diff --git a/src/glw/GLwDrawA.c b/src/glw/GLwDrawA.c deleted file mode 100644 index 30304a40801..00000000000 --- a/src/glw/GLwDrawA.c +++ /dev/null @@ -1,684 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ - -/* - * - * This file has been slightly modified from the original for use with Mesa - * - * Jeroen van der Zijp - * - * jvz@cyberia.cfdrc.com - * - */ -#include -#include -#include -#include -#ifdef __GLX_MOTIF -#include -#include "GLwMDrawAP.h" -#else -#include "GLwDrawAP.h" -#endif -#include -#include - -#ifdef __GLX_MOTIF -#define GLwDrawingAreaWidget GLwMDrawingAreaWidget -#define GLwDrawingAreaClassRec GLwMDrawingAreaClassRec -#define glwDrawingAreaClassRec glwMDrawingAreaClassRec -#define glwDrawingAreaWidgetClass glwMDrawingAreaWidgetClass -#define GLwDrawingAreaRec GLwMDrawingAreaRec -#endif - -#define ATTRIBLIST_SIZE 32 - -#define offset(field) XtOffset(GLwDrawingAreaWidget,glwDrawingArea.field) - - -/* forward definitions */ -static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value); -static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args); -static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes); -static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region); -static void Resize(GLwDrawingAreaWidget glw); -static void Destroy(GLwDrawingAreaWidget glw); -static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams); - - - -static char defaultTranslations[] = -#ifdef __GLX_MOTIF - "osfHelp:PrimitiveHelp() \n" -#endif - ": glwInput() \n\ - : glwInput() \n\ - : glwInput() \n\ - : glwInput() \n\ - : glwInput() "; - - -static XtActionsRec actions[] = { - {"glwInput",(XtActionProc)glwInput}, /* key or mouse input */ - }; - - -/* - * There is a bit of unusual handling of the resources here. - * Because Xt insists on allocating the colormap resource when it is - * processing the core resources (even if we redeclare the colormap - * resource here, we need to do a little trick. When Xt first allocates - * the colormap, we allow it to allocate the default one, since we have - * not yet determined the appropriate visual (which is determined from - * resources parsed after the colormap). We also let it allocate colors - * in that default colormap. - * - * In the initialize proc we calculate the actual visual. Then, we - * reobtain the colormap resource using XtGetApplicationResources in - * the initialize proc. If requested, we also reallocate colors in - * that colormap using the same method. - */ - -static XtResource resources[] = { - /* The GLX attributes. Add any new attributes here */ - - {GLwNbufferSize, GLwCBufferSize, XtRInt, sizeof (int), - offset(bufferSize), XtRImmediate, (XtPointer) 0}, - - {GLwNlevel, GLwCLevel, XtRInt, sizeof (int), - offset(level), XtRImmediate, (XtPointer) 0}, - - {GLwNrgba, GLwCRgba, XtRBoolean, sizeof (Boolean), - offset(rgba), XtRImmediate, (XtPointer) FALSE}, - - {GLwNdoublebuffer, GLwCDoublebuffer, XtRBoolean, sizeof (Boolean), - offset(doublebuffer), XtRImmediate, (XtPointer) FALSE}, - - {GLwNstereo, GLwCStereo, XtRBoolean, sizeof (Boolean), - offset(stereo), XtRImmediate, (XtPointer) FALSE}, - - {GLwNauxBuffers, GLwCAuxBuffers, XtRInt, sizeof (int), - offset(auxBuffers), XtRImmediate, (XtPointer) 0}, - - {GLwNredSize, GLwCColorSize, XtRInt, sizeof (int), - offset(redSize), XtRImmediate, (XtPointer) 1}, - - {GLwNgreenSize, GLwCColorSize, XtRInt, sizeof (int), - offset(greenSize), XtRImmediate, (XtPointer) 1}, - - {GLwNblueSize, GLwCColorSize, XtRInt, sizeof (int), - offset(blueSize), XtRImmediate, (XtPointer) 1}, - - {GLwNalphaSize, GLwCAlphaSize, XtRInt, sizeof (int), - offset(alphaSize), XtRImmediate, (XtPointer) 0}, - - {GLwNdepthSize, GLwCDepthSize, XtRInt, sizeof (int), - offset(depthSize), XtRImmediate, (XtPointer) 0}, - - {GLwNstencilSize, GLwCStencilSize, XtRInt, sizeof (int), - offset(stencilSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumRedSize, GLwCAccumColorSize, XtRInt, sizeof (int), - offset(accumRedSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumGreenSize, GLwCAccumColorSize, XtRInt, sizeof (int), - offset(accumGreenSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumBlueSize, GLwCAccumColorSize, XtRInt, sizeof (int), - offset(accumBlueSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumAlphaSize, GLwCAccumAlphaSize, XtRInt, sizeof (int), - offset(accumAlphaSize), XtRImmediate, (XtPointer) 0}, - - /* the attribute list */ - {GLwNattribList, GLwCAttribList, XtRPointer, sizeof(int *), - offset(attribList), XtRImmediate, (XtPointer) NULL}, - - /* the visual info */ - {GLwNvisualInfo, GLwCVisualInfo, GLwRVisualInfo, sizeof (XVisualInfo *), - offset(visualInfo), XtRImmediate, (XtPointer) NULL}, - - /* miscellaneous resources */ - {GLwNinstallColormap, GLwCInstallColormap, XtRBoolean, sizeof (Boolean), - offset(installColormap), XtRImmediate, (XtPointer) TRUE}, - - {GLwNallocateBackground, GLwCAllocateColors, XtRBoolean, sizeof (Boolean), - offset(allocateBackground), XtRImmediate, (XtPointer) FALSE}, - - {GLwNallocateOtherColors, GLwCAllocateColors, XtRBoolean, sizeof (Boolean), - offset(allocateOtherColors), XtRImmediate, (XtPointer) FALSE}, - - {GLwNinstallBackground, GLwCInstallBackground, XtRBoolean, sizeof (Boolean), - offset(installBackground), XtRImmediate, (XtPointer) TRUE}, - - {GLwNginitCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(ginitCallback), XtRImmediate, (XtPointer) NULL}, - - {GLwNinputCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(inputCallback), XtRImmediate, (XtPointer) NULL}, - - {GLwNresizeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(resizeCallback), XtRImmediate, (XtPointer) NULL}, - - {GLwNexposeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(exposeCallback), XtRImmediate, (XtPointer) NULL}, - - /* Changes to Motif primitive resources */ -#ifdef __GLX_MOTIF - {XmNtraversalOn, XmCTraversalOn, XmRBoolean, sizeof (Boolean), - XtOffset (GLwDrawingAreaWidget, primitive.traversal_on), XmRImmediate, - (XtPointer)FALSE}, - - /* highlighting is normally disabled, as when Motif tries to disable - * highlighting, it tries to reset the color back to the parent's - * background (usually Motif blue). Unfortunately, that is in a - * different colormap, and doesn't work too well. - */ - {XmNhighlightOnEnter, XmCHighlightOnEnter, XmRBoolean, sizeof (Boolean), - XtOffset (GLwDrawingAreaWidget, primitive.highlight_on_enter), - XmRImmediate, (XtPointer) FALSE}, - - {XmNhighlightThickness, XmCHighlightThickness, XmRHorizontalDimension, - sizeof (Dimension), - XtOffset (GLwDrawingAreaWidget, primitive.highlight_thickness), - XmRImmediate, (XtPointer) 0}, -#endif - }; - - -/* -** The following resources are reobtained using XtGetApplicationResources -** in the initialize proc. -*/ - -/* The colormap */ -static XtResource initializeResources[] = { - /* reobtain the colormap with the new visual */ - {XtNcolormap, XtCColormap, XtRColormap, sizeof(Colormap), - XtOffset(GLwDrawingAreaWidget, core.colormap), - XtRCallProc,(XtPointer) createColormap}, - }; - - -/* reallocate any colors we need in the new colormap */ - -/* The background is obtained only if the allocateBackground resource is TRUE*/ -static XtResource backgroundResources[] = { -#ifdef __GLX_MOTIF - {XmNbackground, XmCBackground,XmRPixel, - sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,core.background_pixel), - XmRString,(XtPointer)"lightgrey"}, - /*XmRCallProc,(XtPointer)_XmBackgroundColorDefault},*/ - - {XmNbackgroundPixmap,XmCPixmap,XmRXmBackgroundPixmap, - sizeof(Pixmap),XtOffset(GLwDrawingAreaWidget,core.background_pixmap), - XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP}, - -#else - {XtNbackground,XtCBackground,XtRPixel,sizeof(Pixel), - XtOffset(GLwDrawingAreaWidget,core.background_pixel), - XtRString,(XtPointer)"lightgrey"}, - /*XtRString,(XtPointer)"XtDefaultBackground"},*/ - - {XtNbackgroundPixmap, XtCPixmap, XtRPixmap, sizeof(Pixmap), - XtOffset(GLwDrawingAreaWidget,core.background_pixmap), - XtRImmediate,(XtPointer)XtUnspecifiedPixmap}, -#endif - }; - - - -/* The other colors such as the foreground are allocated only if - * allocateOtherColors are set. These resources only exist in Motif. - */ -#ifdef __GLX_MOTIF -static XtResource otherColorResources[] = { - {XmNforeground,XmCForeground,XmRPixel, - sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,primitive.foreground), - XmRString,(XtPointer)"lighgrey"}, - /*XmRCallProc, (XtPointer) _XmForegroundColorDefault},*/ - - {XmNhighlightColor,XmCHighlightColor,XmRPixel,sizeof(Pixel), - XtOffset(GLwDrawingAreaWidget,primitive.highlight_color), - XmRString,(XtPointer)"lightgrey"}, - /*XmRCallProc,(XtPointer)_XmHighlightColorDefault},*/ - - {XmNhighlightPixmap,XmCHighlightPixmap,XmRPrimHighlightPixmap, - sizeof(Pixmap), - XtOffset(GLwDrawingAreaWidget,primitive.highlight_pixmap), - XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP}, - /*XmRCallProc,(XtPointer)_XmPrimitiveHighlightPixmapDefault},*/ - }; -#endif - - -#undef offset - - -GLwDrawingAreaClassRec glwDrawingAreaClassRec = { - { /* core fields */ -#ifdef __GLX_MOTIF - /* superclass */ (WidgetClass) &xmPrimitiveClassRec, - /* class_name */ "GLwMDrawingArea", -#else /* not __GLX_MOTIF */ - /* superclass */ (WidgetClass) &widgetClassRec, - /* class_name */ "GLwDrawingArea", -#endif /* __GLX_MOTIF */ - /* widget_size */ sizeof(GLwDrawingAreaRec), - /* class_initialize */ NULL, - /* class_part_initialize */ NULL, - /* class_inited */ FALSE, - /* initialize */ (XtInitProc) Initialize, - /* initialize_hook */ NULL, - /* realize */ Realize, - /* actions */ actions, - /* num_actions */ XtNumber(actions), - /* resources */ resources, - /* num_resources */ XtNumber(resources), - /* xrm_class */ NULLQUARK, - /* compress_motion */ TRUE, - /* compress_exposure */ TRUE, - /* compress_enterleave */ TRUE, - /* visible_interest */ TRUE, - /* destroy */ (XtWidgetProc) Destroy, - /* resize */ (XtWidgetProc) Resize, - /* expose */ (XtExposeProc) Redraw, - /* set_values */ NULL, - /* set_values_hook */ NULL, - /* set_values_almost */ XtInheritSetValuesAlmost, - /* get_values_hook */ NULL, - /* accept_focus */ NULL, - /* version */ XtVersion, - /* callback_private */ NULL, - /* tm_table */ defaultTranslations, - /* query_geometry */ XtInheritQueryGeometry, - /* display_accelerator */ XtInheritDisplayAccelerator, - /* extension */ NULL - }, -#ifdef __GLX_MOTIF /* primitive resources */ - { - /* border_highlight */ XmInheritBorderHighlight, - /* border_unhighlight */ XmInheritBorderUnhighlight, - /* translations */ XtInheritTranslations, - /* arm_and_activate */ NULL, - /* get_resources */ NULL, - /* num get_resources */ 0, - /* extension */ NULL, - } -#endif - }; - -WidgetClass glwDrawingAreaWidgetClass=(WidgetClass)&glwDrawingAreaClassRec; - - - -static void error(Widget w,char* string){ - char buf[100]; -#ifdef __GLX_MOTIF - sprintf(buf,"GLwMDrawingArea: %s\n",string); -#else - sprintf(buf,"GLwDrawingArea: %s\n",string); -#endif - XtAppError(XtWidgetToApplicationContext(w),buf); - } - - -static void warning(Widget w,char* string){ - char buf[100]; -#ifdef __GLX_MOTIF - sprintf (buf, "GLwMDraw: %s\n", string); -#else - sprintf (buf, "GLwDraw: %s\n", string); -#endif - XtAppWarning(XtWidgetToApplicationContext(w), buf); - } - - - -/* Initialize the attribList based on the attributes */ -static void createAttribList(GLwDrawingAreaWidget w){ - int *ptr; - w->glwDrawingArea.attribList = (int*)XtMalloc(ATTRIBLIST_SIZE*sizeof(int)); - if(!w->glwDrawingArea.attribList){ - error((Widget)w,"Unable to allocate attribute list"); - } - ptr = w->glwDrawingArea.attribList; - *ptr++ = GLX_BUFFER_SIZE; - *ptr++ = w->glwDrawingArea.bufferSize; - *ptr++ = GLX_LEVEL; - *ptr++ = w->glwDrawingArea.level; - if(w->glwDrawingArea.rgba) *ptr++ = GLX_RGBA; - if(w->glwDrawingArea.doublebuffer) *ptr++ = GLX_DOUBLEBUFFER; - if(w->glwDrawingArea.stereo) *ptr++ = GLX_STEREO; - *ptr++ = GLX_AUX_BUFFERS; - *ptr++ = w->glwDrawingArea.auxBuffers; - *ptr++ = GLX_RED_SIZE; - *ptr++ = w->glwDrawingArea.redSize; - *ptr++ = GLX_GREEN_SIZE; - *ptr++ = w->glwDrawingArea.greenSize; - *ptr++ = GLX_BLUE_SIZE; - *ptr++ = w->glwDrawingArea.blueSize; - *ptr++ = GLX_ALPHA_SIZE; - *ptr++ = w->glwDrawingArea.alphaSize; - *ptr++ = GLX_DEPTH_SIZE; - *ptr++ = w->glwDrawingArea.depthSize; - *ptr++ = GLX_STENCIL_SIZE; - *ptr++ = w->glwDrawingArea.stencilSize; - *ptr++ = GLX_ACCUM_RED_SIZE; - *ptr++ = w->glwDrawingArea.accumRedSize; - *ptr++ = GLX_ACCUM_GREEN_SIZE; - *ptr++ = w->glwDrawingArea.accumGreenSize; - *ptr++ = GLX_ACCUM_BLUE_SIZE; - *ptr++ = w->glwDrawingArea.accumBlueSize; - *ptr++ = GLX_ACCUM_ALPHA_SIZE; - *ptr++ = w->glwDrawingArea.accumAlphaSize; - *ptr++ = None; - assert((ptr-w->glwDrawingArea.attribList)glwDrawingArea.attribList); - w->glwDrawingArea.visualInfo=glXChooseVisual(XtDisplay(w),XScreenNumberOfScreen(XtScreen(w)),w->glwDrawingArea.attribList); - if(!w->glwDrawingArea.visualInfo) error((Widget)w,"requested visual not supported"); - } - - - -/* Initialize the colormap based on the visual info. - * This routine maintains a cache of visual-infos to colormaps. If two - * widgets share the same visual info, they share the same colormap. - * This function is called by the callProc of the colormap resource entry. - */ -static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value){ - static struct cmapCache { Visual *visual; Colormap cmap; } *cmapCache; - static int cacheEntries=0; - static int cacheMalloced=0; - register int i; - - assert(w->glwDrawingArea.visualInfo); - - /* see if we can find it in the cache */ - for(i=0; iglwDrawingArea.visualInfo->visual){ - value->addr=(XtPointer)(&cmapCache[i].cmap); - return; - } - } - - /* not in the cache, create a new entry */ - if(cacheEntries >= cacheMalloced){ - /* need to malloc a new one. Since we are likely to have only a - * few colormaps, we allocate one the first time, and double - * each subsequent time. - */ - if(cacheMalloced==0){ - cacheMalloced=1; - cmapCache=(struct cmapCache*)XtMalloc(sizeof(struct cmapCache)); - } - else{ - cacheMalloced<<=1; - cmapCache=(struct cmapCache*)XtRealloc((char*)cmapCache,sizeof(struct cmapCache)*cacheMalloced); - } - } - - cmapCache[cacheEntries].cmap=XCreateColormap(XtDisplay(w), - RootWindow(XtDisplay(w), - w->glwDrawingArea.visualInfo->screen), - w->glwDrawingArea.visualInfo->visual, - AllocNone); - cmapCache[cacheEntries].visual=w->glwDrawingArea.visualInfo->visual; - value->addr=(XtPointer)(&cmapCache[cacheEntries++].cmap); - } - - - -static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args){ - - /* fix size */ - if(req->core.width==0) neww->core.width=100; - if(req->core.height==0) neww->core.width=100; - - /* create the attribute list if needed */ - neww->glwDrawingArea.myList=FALSE; - if(neww->glwDrawingArea.attribList==NULL){ - neww->glwDrawingArea.myList=TRUE; - createAttribList(neww); - } - - /* Gotta have it */ - assert(neww->glwDrawingArea.attribList); - - /* determine the visual info if needed */ - neww->glwDrawingArea.myVisual=FALSE; - if(neww->glwDrawingArea.visualInfo==NULL){ - neww->glwDrawingArea.myVisual=TRUE; - createVisualInfo(neww); - } - - /* Gotta have that too */ - assert(neww->glwDrawingArea.visualInfo); - - neww->core.depth=neww->glwDrawingArea.visualInfo->depth; - - /* Reobtain the colormap and colors in it using XtGetApplicationResources*/ - XtGetApplicationResources((Widget)neww,neww,initializeResources,XtNumber(initializeResources),args,*num_args); - - /* obtain the color resources if appropriate */ - if(req->glwDrawingArea.allocateBackground){ - XtGetApplicationResources((Widget)neww,neww,backgroundResources,XtNumber(backgroundResources),args,*num_args); - } - -#ifdef __GLX_MOTIF - if(req->glwDrawingArea.allocateOtherColors){ - XtGetApplicationResources((Widget)neww,neww,otherColorResources,XtNumber(otherColorResources),args,*num_args); - } -#endif - } - - - -static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes){ - register GLwDrawingAreaWidget glw=(GLwDrawingAreaWidget)w; - GLwDrawingAreaCallbackStruct cb; - Widget parentShell; - Status status; - Window windows[2],*windowsReturn,*windowList; - int countReturn,i; - - /* if we haven't requested that the background be both installed and - * allocated, don't install it. - */ - if(!(glw->glwDrawingArea.installBackground && glw->glwDrawingArea.allocateBackground)){ - *valueMask&=~CWBackPixel; - } - - XtCreateWindow(w,(unsigned int)InputOutput,glw->glwDrawingArea.visualInfo->visual,*valueMask,attributes); - - /* if appropriate, call XSetWMColormapWindows to install the colormap */ - if(glw->glwDrawingArea.installColormap){ - - /* Get parent shell */ - for(parentShell=XtParent(w); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell)); - - if(parentShell && XtWindow(parentShell)){ - - /* check to see if there is already a property */ - status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn); - - /* if no property, just create one */ - if(!status){ - windows[0]=XtWindow(w); - windows[1]=XtWindow(parentShell); - XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windows,2); - } - - /* there was a property, add myself to the beginning */ - else{ - windowList=(Window *)XtMalloc((sizeof(Window))*(countReturn+1)); - windowList[0]=XtWindow(w); - for(i=0; icore.width; - cb.height=glw->core.height; - XtCallCallbackList((Widget)glw,glw->glwDrawingArea.ginitCallback,&cb); - } - - - -static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region){ - GLwDrawingAreaCallbackStruct cb; - if(!XtIsRealized((Widget)w)) return; - cb.reason=GLwCR_EXPOSE; - cb.event=event; - cb.width=w->core.width; - cb.height=w->core.height; - XtCallCallbackList((Widget)w,w->glwDrawingArea.exposeCallback,&cb); - } - - - -static void Resize(GLwDrawingAreaWidget glw){ - GLwDrawingAreaCallbackStruct cb; - if(!XtIsRealized((Widget)glw)) return; - cb.reason=GLwCR_RESIZE; - cb.event=NULL; - cb.width=glw->core.width; - cb.height=glw->core.height; - XtCallCallbackList((Widget)glw,glw->glwDrawingArea.resizeCallback,&cb); - } - - - -static void Destroy(GLwDrawingAreaWidget glw){ - Window *windowsReturn; - Widget parentShell; - Status status; - int countReturn; - register int i; - - if(glw->glwDrawingArea.myList && glw->glwDrawingArea.attribList){ - XtFree((XtPointer)glw->glwDrawingArea.attribList); - } - - if(glw->glwDrawingArea.myVisual && glw->glwDrawingArea.visualInfo){ - XtFree((XtPointer)glw->glwDrawingArea.visualInfo); - } - - /* if my colormap was installed, remove it */ - if(glw->glwDrawingArea.installColormap){ - - /* Get parent shell */ - for(parentShell=XtParent(glw); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell)); - - if(parentShell && XtWindow(parentShell)){ - - /* make sure there is a property */ - status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn); - - /* if no property, just return. If there was a property, continue */ - if(status){ - - /* search for a match */ - for(i=0; icore.width; - cb.height=glw->core.height; - XtCallCallbackList((Widget)glw,glw->glwDrawingArea.inputCallback,&cb); - } - - -#ifdef __GLX_MOTIF - -/* Create routine */ -Widget GLwCreateMDrawingArea(Widget parent, char *name,ArgList arglist,Cardinal argcount){ - return XtCreateWidget(name,glwMDrawingAreaWidgetClass, parent, arglist,argcount); - } - -#endif - - -#ifndef __GLX_MOTIF - -/* Make context current */ -void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx){ - glXMakeCurrent(XtDisplay(w),XtWindow(w),ctx); - } - - -/* Swap buffers convenience function */ -void GLwDrawingAreaSwapBuffers(Widget w){ - glXSwapBuffers(XtDisplay(w),XtWindow(w)); - } - -#endif diff --git a/src/glw/GLwDrawA.h b/src/glw/GLwDrawA.h deleted file mode 100644 index b9711c216bc..00000000000 --- a/src/glw/GLwDrawA.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef _GLwDrawA_h -#define _GLwDrawA_h - -#include -#include - -/**************************************************************** - * - * GLwDrawingArea widgets - * - ****************************************************************/ - -/* Resources: - - Name Class RepType Default Value - ---- ----- ------- ------------- - attribList AttribList int * NULL - visualInfo VisualInfo VisualInfo NULL - installColormap InstallColormap Boolean TRUE - allocateBackground AllocateColors Boolean FALSE - allocateOtherColors AllocateColors Boolean FALSE - installBackground InstallBackground Boolean TRUE - exposeCallback Callback Pointer NULL - ginitCallback Callback Pointer NULL - inputCallback Callback Pointer NULL - resizeCallback Callback Pointer NULL - -*** The following resources all correspond to the GLX configuration -*** attributes and are used to create the attribList if it is NULL - bufferSize BufferSize int 0 - level Level int 0 - rgba Rgba Boolean FALSE - doublebuffer Doublebuffer Boolean FALSE - stereo Stereo Boolean FALSE - auxBuffers AuxBuffers int 0 - redSize ColorSize int 1 - greenSize ColorSize int 1 - blueSize ColorSize int 1 - alphaSize AlphaSize int 0 - depthSize DepthSize int 0 - stencilSize StencilSize int 0 - accumRedSize AccumColorSize int 0 - accumGreenSize AccumColorSize int 0 - accumBlueSize AccumColorSize int 0 - accumAlphaSize AccumAlphaSize int 0 -*/ - -#define GLwNattribList "attribList" -#define GLwCAttribList "AttribList" -#define GLwNvisualInfo "visualInfo" -#define GLwCVisualInfo "VisualInfo" -#define GLwRVisualInfo "VisualInfo" - -#define GLwNinstallColormap "installColormap" -#define GLwCInstallColormap "InstallColormap" -#define GLwNallocateBackground "allocateBackground" -#define GLwNallocateOtherColors "allocateOtherColors" -#define GLwCAllocateColors "AllocateColors" -#define GLwNinstallBackground "installBackground" -#define GLwCInstallBackground "InstallBackground" - -#define GLwCCallback "Callback" -#define GLwNexposeCallback "exposeCallback" -#define GLwNginitCallback "ginitCallback" -#define GLwNresizeCallback "resizeCallback" -#define GLwNinputCallback "inputCallback" - -#define GLwNbufferSize "bufferSize" -#define GLwCBufferSize "BufferSize" -#define GLwNlevel "level" -#define GLwCLevel "Level" -#define GLwNrgba "rgba" -#define GLwCRgba "Rgba" -#define GLwNdoublebuffer "doublebuffer" -#define GLwCDoublebuffer "Doublebuffer" -#define GLwNstereo "stereo" -#define GLwCStereo "Stereo" -#define GLwNauxBuffers "auxBuffers" -#define GLwCAuxBuffers "AuxBuffers" -#define GLwNredSize "redSize" -#define GLwNgreenSize "greenSize" -#define GLwNblueSize "blueSize" -#define GLwCColorSize "ColorSize" -#define GLwNalphaSize "alphaSize" -#define GLwCAlphaSize "AlphaSize" -#define GLwNdepthSize "depthSize" -#define GLwCDepthSize "DepthSize" -#define GLwNstencilSize "stencilSize" -#define GLwCStencilSize "StencilSize" -#define GLwNaccumRedSize "accumRedSize" -#define GLwNaccumGreenSize "accumGreenSize" -#define GLwNaccumBlueSize "accumBlueSize" -#define GLwCAccumColorSize "AccumColorSize" -#define GLwNaccumAlphaSize "accumAlphaSize" -#define GLwCAccumAlphaSize "AccumAlphaSize" - -#ifdef __GLX_MOTIF - -typedef struct _GLwMDrawingAreaClassRec *GLwMDrawingAreaWidgetClass; -typedef struct _GLwMDrawingAreaRec *GLwMDrawingAreaWidget; - -GLAPI WidgetClass glwMDrawingAreaWidgetClass; - - -#else - -typedef struct _GLwDrawingAreaClassRec *GLwDrawingAreaWidgetClass; -typedef struct _GLwDrawingAreaRec *GLwDrawingAreaWidget; - -GLAPI WidgetClass glwDrawingAreaWidgetClass; - - -#endif - - -/* Callback reasons */ -#ifdef __GLX_MOTIF -#define GLwCR_EXPOSE XmCR_EXPOSE -#define GLwCR_RESIZE XmCR_RESIZE -#define GLwCR_INPUT XmCR_INPUT -#else -/* The same values as Motif, but don't use Motif constants */ -#define GLwCR_EXPOSE 38 -#define GLwCR_RESIZE 39 -#define GLwCR_INPUT 40 -#endif - -#define GLwCR_GINIT 32135 /* Arbitrary number that should neverr clash */ - -typedef struct - { - int reason; - XEvent *event; - Dimension width,height; - } - GLwDrawingAreaCallbackStruct; - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -/* front ends to glXMakeCurrent and glXSwapBuffers */ -GLAPI void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx); -GLAPI void GLwDrawingAreaSwapBuffers(Widget w); - -#ifdef __GLX_MOTIF -#ifdef _NO_PROTO -GLAPI Widget GLwCreateMDrawingArea(); -#else -GLAPI Widget GLwCreateMDrawingArea(Widget parent,char *name,ArgList arglist,Cardinal argcount); -#endif -#endif - -#if defined(__cplusplus) || defined(c_plusplus) -} -#endif - -#endif diff --git a/src/glw/GLwDrawAP.h b/src/glw/GLwDrawAP.h deleted file mode 100644 index 4ff21b426dd..00000000000 --- a/src/glw/GLwDrawAP.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef _GLwDrawAP_h -#define _GLwDrawAP_h - - -/* MOTIF */ -#ifdef __GLX_MOTIF -#include "GLwMDrawA.h" -#else -#include "GLwDrawA.h" -#endif - -typedef struct _GLwDrawingAreaClassPart { - caddr_t extension; - } GLwDrawingAreaClassPart; - - -#ifdef __GLX_MOTIF -typedef struct _GLwMDrawingAreaClassRec { - CoreClassPart core_class; - XmPrimitiveClassPart primitive_class; - GLwDrawingAreaClassPart glwDrawingArea_class; - } GLwMDrawingAreaClassRec; - - -GLAPI GLwMDrawingAreaClassRec glwMDrawingAreaClassRec; - - -/* XT */ -#else - -typedef struct _GLwDrawingAreaClassRec { - CoreClassPart core_class; - GLwDrawingAreaClassPart glwDrawingArea_class; - } GLwDrawingAreaClassRec; - -GLAPI GLwDrawingAreaClassRec glwDrawingAreaClassRec; - - -#endif - - - -typedef struct { - /* resources */ - int * attribList; - XVisualInfo * visualInfo; - Boolean myList; /* TRUE if we malloced the attribList*/ - Boolean myVisual; /* TRUE if we created the visualInfo*/ - Boolean installColormap; - Boolean allocateBackground; - Boolean allocateOtherColors; - Boolean installBackground; - XtCallbackList ginitCallback; - XtCallbackList resizeCallback; - XtCallbackList exposeCallback; - XtCallbackList inputCallback; - /* specific attributes; add as we get new attributes */ - int bufferSize; - int level; - Boolean rgba; - Boolean doublebuffer; - Boolean stereo; - int auxBuffers; - int redSize; - int greenSize; - int blueSize; - int alphaSize; - int depthSize; - int stencilSize; - int accumRedSize; - int accumGreenSize; - int accumBlueSize; - int accumAlphaSize; - } GLwDrawingAreaPart; - -#ifdef __GLX_MOTIF - -typedef struct _GLwMDrawingAreaRec { - CorePart core; - XmPrimitivePart primitive; - GLwDrawingAreaPart glwDrawingArea; - } GLwMDrawingAreaRec; - -#else - -typedef struct _GLwDrawingAreaRec { - CorePart core; - GLwDrawingAreaPart glwDrawingArea; - } GLwDrawingAreaRec; - -#endif - -#endif diff --git a/src/glw/GLwMDrawA.c b/src/glw/GLwMDrawA.c deleted file mode 100644 index bdefe92a6d0..00000000000 --- a/src/glw/GLwMDrawA.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef __GLX_MOTIF -#define __GLX_MOTIF 1 -#endif -#include "GLwDrawA.c" diff --git a/src/glw/GLwMDrawA.h b/src/glw/GLwMDrawA.h deleted file mode 100644 index 2e245890410..00000000000 --- a/src/glw/GLwMDrawA.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef __GLX_MOTIF -#define __GLX_MOTIF 1 -#endif -#include "GLwDrawA.h" diff --git a/src/glw/GLwMDrawAP.h b/src/glw/GLwMDrawAP.h deleted file mode 100644 index a0a689bb996..00000000000 --- a/src/glw/GLwMDrawAP.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef __GLX_MOTIF -#define __GLX_MOTIF 1 -#endif -#include "GLwDrawAP.h" diff --git a/src/glw/Makefile b/src/glw/Makefile deleted file mode 100644 index 776b1aa5bfb..00000000000 --- a/src/glw/Makefile +++ /dev/null @@ -1,74 +0,0 @@ -# src/glw/Makefile - -TOP = ../.. -include $(TOP)/configs/current - -MAJOR = 1 -MINOR = 0 -TINY = 0 - -INCDIRS = -I$(TOP)/include $(MOTIF_CFLAGS) $(X11_INCLUDES) - - -OBJECTS = $(GLW_SOURCES:.c=.o) - - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCDIRS) $(CFLAGS) $(GLW_CFLAGS) $< - - - -##### TARGETS ##### - -default: $(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME) - -# GLU pkg-config file -pcedit = sed \ - -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ - -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ - -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MAJOR).$(MINOR).$(TINY),' \ - -e 's,@GLW_PC_REQ_PRIV@,$(GLW_PC_REQ_PRIV),' \ - -e 's,@GLW_PC_LIB_PRIV@,$(GLW_PC_LIB_PRIV),' \ - -e 's,@GLW_PC_CFLAGS@,$(GLW_PC_CFLAGS),' \ - -e 's,@GLW_LIB@,$(GLW_LIB),' -glw.pc: glw.pc.in - $(pcedit) $< > $@ - -install: glw.pc - $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - $(INSTALL) -m 644 *.h $(DESTDIR)$(INSTALL_INC_DIR)/GL - $(MINSTALL) $(TOP)/$(LIB_DIR)/$(GLW_LIB_GLOB) $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -m 644 glw.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - -clean: - -rm -f depend depend.bak - -rm -f *.o *.pc *~ - - -# Make the library -$(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME): $(OBJECTS) - $(MKLIB) -o $(GLW_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -major $(MAJOR) -minor $(MINOR) -patch $(TINY) \ - $(MKLIB_OPTIONS) -install $(TOP)/$(LIB_DIR) \ - -id $(INSTALL_LIB_DIR)/lib$(GLW_LIB).$(MAJOR).dylib \ - $(GLW_LIB_DEPS) $(OBJECTS) - - -# -# Run 'make depend' to update the dependencies if you change what's included -# by any source file. -# -depend: $(GLW_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(GLW_SOURCES) \ - $(X11_INCLUDES) > /dev/null - - --include depend diff --git a/src/glw/README b/src/glw/README deleted file mode 100644 index 70f4f7bc2ee..00000000000 --- a/src/glw/README +++ /dev/null @@ -1,56 +0,0 @@ - - widgets README file - - -This directory contains the source code for SGI's OpenGL Xt/Motif widgets, -slightly modified by Jeroen van der Zijp to work better with Mesa. - -To compile the widget code (producing lib/libGLw.a) cd to the widgets/ -directory and type 'make ' where is the system configuration -you used to compile Mesa (like 'make linux'). This hasn't been tested on -many systems so let us know if you have trouble. - -If you want to make a Linux ELF shared lib instead of the non-shared .a -file see the notes in the Makefile. - -If you want to build with Motif support, edit Makefile.X11, looking -for the "Motif" information. - -The SGI copyright is as follows. - - - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. diff --git a/src/glw/glw.pc.in b/src/glw/glw.pc.in deleted file mode 100644 index 19a7c307c01..00000000000 --- a/src/glw/glw.pc.in +++ /dev/null @@ -1,13 +0,0 @@ -prefix=@INSTALL_DIR@ -exec_prefix=${prefix} -libdir=@INSTALL_LIB_DIR@ -includedir=@INSTALL_INC_DIR@ - -Name: glw -Description: Mesa OpenGL widget library -Requires: gl -Requires.private: @GLW_PC_REQ_PRIV@ -Version: @VERSION@ -Libs: -L${libdir} -l@GLW_LIB@ -Libs.private: @GLW_PC_LIB_PRIV@ -Cflags: -I${includedir} @GLW_PC_CFLAGS@ From c548192cafdf7dfab4cf7e0a0734417ee16f1c94 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 5 Aug 2011 16:59:04 -0700 Subject: [PATCH 299/600] docs: Remove GLw from the documentation except for a new FAQ entry. Also remove an outdated reference to GLEW being in tree. Reviewed-by: Brian Paul --- docs/autoconf.html | 7 ------- docs/download.html | 1 - docs/faq.html | 7 ++++++- docs/install.html | 5 ----- docs/sourcetree.html | 2 -- 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/docs/autoconf.html b/docs/autoconf.html index 64bcbd48a67..895cf665c5b 100644 --- a/docs/autoconf.html +++ b/docs/autoconf.html @@ -20,7 +20,6 @@
  • Library Options
  • Demo Program Options
  • @@ -245,12 +244,6 @@ instructions. on all drivers. This can be disable with the option --disable-glu. - - -
  • GLw - The libGLw library will be built by default -if libGLU has been enabled. This can be disable with the option ---disable-glw. -
  • diff --git a/docs/download.html b/docs/download.html index 3c4d5976c2c..4e8fc2f02f0 100644 --- a/docs/download.html +++ b/docs/download.html @@ -84,7 +84,6 @@ src/mesa - sources for the main Mesa library and device drivers src/gallium - sources for Gallium and Gallium drivers src/glu - libGLU source code src/glx - sources for building libGL with full GLX and DRI support -src/glw - Xt/Motif/OpenGL widget code If you downloaded and unpacked the MesaGLUT.x.y.z package: diff --git a/docs/faq.html b/docs/faq.html index 071381c5a1c..bf6545fd5f7 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -204,8 +204,13 @@ If you don't already have GLUT installed, you should grab

    +

    2.4 Where is the GLw library?

    +

    +GLw (OpenGL widget library) is now available from a separate git repository. Unless you're using very old Xt/Motif applications with OpenGL, you shouldn't need it. +

    -

    2.4 What's the proper place for the libraries and headers?

    + +

    2.5 What's the proper place for the libraries and headers?

    On Linux-based systems you'll want to follow the libGLU.so lrwxrwxrwx 1 brian users 12 Mar 26 07:53 libglut.so -> libglut.so.3* lrwxrwxrwx 1 brian users 16 Mar 26 07:53 libglut.so.3 -> libglut.so.3.7.1* -rwxr-xr-x 1 brian users 597754 Mar 26 07:53 libglut.so.3.7.1* -lrwxrwxrwx 1 brian users 11 Mar 26 08:04 libGLw.so -> libGLw.so.1* -lrwxrwxrwx 1 brian users 15 Mar 26 08:04 libGLw.so.1 -> libGLw.so.1.0.0* --rwxr-xr-x 1 brian users 20750 Mar 26 08:04 libGLw.so.1.0.0* lrwxrwxrwx 1 brian users 14 Mar 26 07:53 libOSMesa.so -> libOSMesa.so.6* lrwxrwxrwx 1 brian users 23 Mar 26 07:53 libOSMesa.so.6 -> libOSMesa.so.6.1.060100* -rwxr-xr-x 1 brian users 23871 Mar 26 07:53 libOSMesa.so.6.1.060100* @@ -172,8 +169,6 @@ lrwxrwxrwx 1 brian users 23 Mar 26 07:53 libOSMesa.so.6 -> libOSM
    libglut is the GLUT library.
    -libGLw is the Xt/Motif OpenGL drawing area widget library. -
    libOSMesa is the OSMesa (Off-Screen) interface library.

    diff --git a/docs/sourcetree.html b/docs/sourcetree.html index 2e2d1d3f275..713e25b019c 100644 --- a/docs/sourcetree.html +++ b/docs/sourcetree.html @@ -153,8 +153,6 @@ each directory.
  • glx - The GLX library code for building libGL. This is used for direct rendering drivers. It will dynamically load one of the xxx_dri.so drivers. -
  • glw - Widgets for Xt/Motif. -
  • glew - OpenGL Extension Wrangler library (used by demo programs)
  • progs - OpenGL test and demonstration programs
  • lib - where the GL libraries are placed From 67b5a3267d639c31d3ac4073be877ffb0f5637d3 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 2 Aug 2011 14:34:17 -0700 Subject: [PATCH 300/600] glsl: Perform implicit type conversions on function call out parameters. When an out parameter undergoes an implicit type conversion, we need to store it in a temporary, and then after the call completes, convert the resulting value. In other words, we convert code like the following: void f(out int x); float value; f(value); Into IR that's equivalent to this: void f(out int x); float value; int out_parameter_conversion; f(out_parameter_conversion); value = float(out_parameter_conversion); This transformation needs to happen during ast-to-IR convertion (as opposed to, say, a lowering pass), because it is invalid IR for formal and actual parameters to have types that don't match. Fixes piglit tests spec/glsl-1.20/compiler/qualifiers/out-conversion-int-to-float.vert and spec/glsl-1.20/execution/qualifiers/vs-out-conversion-*.shader_test, and bug 39651. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39651 Reviewed-by: Chad Versace --- src/glsl/ast_function.cpp | 76 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 5b6ed3bc8f5..c49a33d0486 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -134,6 +134,8 @@ match_function_by_name(exec_list *instructions, const char *name, } } + exec_list post_call_conversions; + if (sig != NULL) { /* Verify that 'out' and 'inout' actual parameters are lvalues. This * isn't done in ir_function::matching_signature because that function @@ -141,6 +143,12 @@ match_function_by_name(exec_list *instructions, const char *name, * * Also, validate that 'const_in' formal parameters (an extension of our * IR) correspond to ir_constant actual parameters. + * + * Also, perform implicit conversion of arguments. Note: to implicitly + * convert out parameters, we need to place them in a temporary + * variable, and do the conversion after the call takes place. Since we + * haven't emitted the call yet, we'll place the post-call conversions + * in a temporary exec_list, and emit them later. */ exec_list_iterator actual_iter = actual_parameters->iterator(); exec_list_iterator formal_iter = sig->parameters.iterator(); @@ -185,8 +193,63 @@ match_function_by_name(exec_list *instructions, const char *name, } if (formal->type->is_numeric() || formal->type->is_boolean()) { - ir_rvalue *converted = convert_component(actual, formal->type); - actual->replace_with(converted); + switch (formal->mode) { + case ir_var_in: { + ir_rvalue *converted + = convert_component(actual, formal->type); + actual->replace_with(converted); + break; + } + case ir_var_out: + if (actual->type != formal->type) { + /* To convert an out parameter, we need to create a + * temporary variable to hold the value before conversion, + * and then perform the conversion after the function call + * returns. + * + * This has the effect of transforming code like this: + * + * void f(out int x); + * float value; + * f(value); + * + * Into IR that's equivalent to this: + * + * void f(out int x); + * float value; + * int out_parameter_conversion; + * f(out_parameter_conversion); + * value = float(out_parameter_conversion); + */ + ir_variable *tmp = + new(ctx) ir_variable(formal->type, + "out_parameter_conversion", + ir_var_temporary); + instructions->push_tail(tmp); + ir_dereference_variable *deref_tmp_1 + = new(ctx) ir_dereference_variable(tmp); + ir_dereference_variable *deref_tmp_2 + = new(ctx) ir_dereference_variable(tmp); + ir_rvalue *converted_tmp + = convert_component(deref_tmp_1, actual->type); + ir_assignment *assignment + = new(ctx) ir_assignment(actual, converted_tmp); + post_call_conversions.push_tail(assignment); + actual->replace_with(deref_tmp_2); + } + break; + case ir_var_inout: + /* Inout parameters should never require conversion, since that + * would require an implicit conversion to exist both to and + * from the formal parameter type, and there are no + * bidirectional implicit conversions. + */ + assert (actual->type == formal->type); + break; + default: + assert (!"Illegal formal parameter mode"); + break; + } } actual_iter.next(); @@ -196,8 +259,11 @@ match_function_by_name(exec_list *instructions, const char *name, /* Always insert the call in the instruction stream, and return a deref * of its return val if it returns a value, since we don't know if * the rvalue is going to be assigned to anything or not. + * + * Also insert any out parameter conversions after the call. */ ir_call *call = new(ctx) ir_call(sig, actual_parameters); + ir_dereference_variable *deref; if (!sig->return_type->is_void()) { /* If the function call is a constant expression, don't * generate the instructions to call it; just generate an @@ -214,7 +280,6 @@ match_function_by_name(exec_list *instructions, const char *name, } ir_variable *var; - ir_dereference_variable *deref; var = new(ctx) ir_variable(sig->return_type, ralloc_asprintf(ctx, "%s_retval", @@ -227,11 +292,12 @@ match_function_by_name(exec_list *instructions, const char *name, instructions->push_tail(assign); deref = new(ctx) ir_dereference_variable(var); - return deref; } else { instructions->push_tail(call); - return NULL; + deref = NULL; } + instructions->append_list(&post_call_conversions); + return deref; } else { char *str = prototype_string(NULL, name, actual_parameters); From a52b53b56e2b5d5853345d8bcd2a4ff50e495c20 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 2 Aug 2011 15:22:25 -0700 Subject: [PATCH 301/600] glsl: Make is_lvalue() and variable_referenced() const. These functions don't modify the target instruction, so it makes sense to make them const. This allows these functions to be called from ir validation code (which uses const to ensure that it doesn't accidentally modify the IR being validated). Reviewed-by: Chad Versace Reviewed-by: Kenneth Graunke --- src/glsl/ir.cpp | 4 ++-- src/glsl/ir.h | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 827fe8e17a7..6f8676ecceb 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1096,7 +1096,7 @@ ir_dereference_record::ir_dereference_record(ir_variable *var, } bool -ir_dereference::is_lvalue() +ir_dereference::is_lvalue() const { ir_variable *var = this->variable_referenced(); @@ -1310,7 +1310,7 @@ ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length) #undef I ir_variable * -ir_swizzle::variable_referenced() +ir_swizzle::variable_referenced() const { return this->val->variable_referenced(); } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 50a9d6e1991..04fa97bf56f 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -144,7 +144,7 @@ public: ir_rvalue *as_rvalue_to_saturate(); - virtual bool is_lvalue() + virtual bool is_lvalue() const { return false; } @@ -152,7 +152,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return NULL; } @@ -1355,7 +1355,7 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); - bool is_lvalue() + bool is_lvalue() const { return val->is_lvalue() && !mask.has_duplicates; } @@ -1363,7 +1363,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced(); + virtual ir_variable *variable_referenced() const; ir_rvalue *val; ir_swizzle_mask mask; @@ -1387,12 +1387,12 @@ public: return this; } - bool is_lvalue(); + bool is_lvalue() const; /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() = 0; + virtual ir_variable *variable_referenced() const = 0; }; @@ -1413,7 +1413,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return this->var; } @@ -1462,7 +1462,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return this->array->variable_referenced(); } @@ -1496,7 +1496,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return this->record->variable_referenced(); } From 303e05cc249df3baeb3ed7654b0de00e7b9358fc Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 2 Aug 2011 15:44:39 -0700 Subject: [PATCH 302/600] glsl: Add validations for ir_call. This patch extends ir_validate.cpp to check the following characteristics of each ir_call: - The number of actual parameters must match the number of formal parameters in the signature. - The type of each actual parameter must match the type of the corresponding formal parameter in the signature. - Each "out" or "inout" actual parameter must be an lvalue. Reviewed-by: Chad Versace --- src/glsl/ir_validate.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index f3fceb2a57d..b3ca72ef0c2 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -541,7 +541,43 @@ ir_validate::visit_enter(ir_call *ir) abort(); } + const exec_node *formal_param_node = callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (true) { + if (formal_param_node->is_tail_sentinel() + != actual_param_node->is_tail_sentinel()) { + printf("ir_call has the wrong number of parameters:\n"); + goto dump_ir; + } + if (formal_param_node->is_tail_sentinel()) { + break; + } + const ir_variable *formal_param + = (const ir_variable *) formal_param_node; + const ir_rvalue *actual_param + = (const ir_rvalue *) actual_param_node; + if (formal_param->type != actual_param->type) { + printf("ir_call parameter type mismatch:\n"); + goto dump_ir; + } + if (formal_param->mode == ir_var_out + || formal_param->mode == ir_var_inout) { + if (!actual_param->is_lvalue()) { + printf("ir_call out/inout parameters must be lvalues:\n"); + goto dump_ir; + } + } + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + } + return visit_continue; + +dump_ir: + ir->print(); + printf("callee:\n"); + callee->print(); + abort(); } void From 5880a9a4a7247e4c31df606bef089c45b4052aaa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 3 Aug 2011 16:36:42 -0700 Subject: [PATCH 303/600] radeon: Explain to the user what went wrong when built without libdrm. Before this commit, even LIBGL_DEBUG=verbose would just fail with: libGL error: failed to create dri screen --- src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 607b7470d4b..a74c6c7a575 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) static inline void *radeon_bo_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } @@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy) static inline void *radeon_cs_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } From a313c29c777de0bc74c252f1d35eef7d3d907e1f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 21:40:50 -0700 Subject: [PATCH 304/600] glsl: When assigning to a whole array, mark the array as accessed. The vs-varying-array-mat2-col-row-wr test writes a mat2[3] constant to a mat2[3] varying out array, and also statically accesses element 1 of it on the VS and FS sides. At link time it would get trimmed down to just 2 elements, and then codegen of the VS would end up generating assignments to the unallocated last entry of the array. On the new i965 VS backend, that happened to land on the vertex position. Some issues remain in this test on softpipe, i965/old-vs and i965/new-vs on visual inspection, but i965 is passing because only one green pixel is probed, not the whole split green/red quad. --- src/glsl/ast_to_hir.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 2025911acd3..9e7496b4b43 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -653,6 +653,16 @@ validate_assignment(struct _mesa_glsl_parse_state *state, return NULL; } +static void +mark_whole_array_access(ir_rvalue *access) +{ + ir_dereference_variable *deref = access->as_dereference_variable(); + + if (deref && deref->var) { + deref->var->max_array_access = deref->type->length - 1; + } +} + ir_rvalue * do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, ir_rvalue *lhs, ir_rvalue *rhs, bool is_initializer, @@ -713,6 +723,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, rhs->type->array_size()); d->type = var->type; } + mark_whole_array_access(lhs); } /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, @@ -773,16 +784,6 @@ ast_node::hir(exec_list *instructions, return NULL; } -static void -mark_whole_array_access(ir_rvalue *access) -{ - ir_dereference_variable *deref = access->as_dereference_variable(); - - if (deref) { - deref->var->max_array_access = deref->type->length - 1; - } -} - static ir_rvalue * do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) { From 9b784069ce76e1914eaafac0542458f6a84d9fc0 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Tue, 16 Aug 2011 09:32:10 +0800 Subject: [PATCH 305/600] dri2: add code to dri2_Flush extension. It's going to flush client's commands in eglWaitClient(). Before this, egl applications using pixmap or pbuffer flicker because of no flush. Reviewed-by: Alan Hourihane --- src/gallium/state_trackers/dri/drm/dri2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index d491e46ab16..908a735234e 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -46,6 +46,10 @@ static void dri2_flush_drawable(__DRIdrawable *draw) { + struct dri_drawable *drawable = dri_drawable(draw); + struct dri_context *ctx = dri_get_current(draw->driScreenPriv); + + ctx->st->flush(ctx->st, 0, NULL); } static void From 8c409403217cf8c13e1d2dd306ad5e86b566c5c9 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Mon, 15 Aug 2011 09:50:19 +0200 Subject: [PATCH 306/600] dri2: Add __DRI_BUFFER_COUNT token Remove definition from egl_dri2. Defining this is egl_dri2.h breaks as soon as a new dri2 buffer token is added like with commit 4501a5d6e8d00fd0d87625352ed5ba1a8861f72e. --- include/GL/internal/dri_interface.h | 3 +++ src/egl/drivers/dri2/egl_dri2.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 4fe9e943b55..eed159e11cd 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -694,6 +694,9 @@ struct __DRIswrastExtensionRec { #define __DRI_BUFFER_DEPTH_STENCIL 9 /**< Only available with DRI2 1.1 */ #define __DRI_BUFFER_HIZ 10 +/* Inofficial and for internal use. Increase when adding a new buffer token. */ +#define __DRI_BUFFER_COUNT 11 + struct __DRIbufferRec { unsigned int attachment; unsigned int name; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index a7297188af2..db93eec14ba 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -123,8 +123,6 @@ enum wayland_buffer_type { WL_BUFFER_THIRD, WL_BUFFER_COUNT }; - -#define __DRI_BUFFER_COUNT 10 #endif enum dri2_surface_type { From ce12f826927cf2d3ac3fd70d893abfb07adc23db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 19:25:07 +0200 Subject: [PATCH 307/600] r600g: first step into winsys/radeon Reviewed-by: Alex Deucher --- configure.ac | 2 +- src/gallium/drivers/r600/r600_pipe.c | 4 +- src/gallium/drivers/r600/r600_pipe.h | 4 +- src/gallium/drivers/r600/r600_public.h | 4 +- src/gallium/targets/dri-r600/Makefile | 1 + src/gallium/targets/dri-r600/target.c | 6 +-- src/gallium/targets/egl-static/Makefile | 3 ++ src/gallium/targets/egl-static/egl_pipe.c | 5 +- src/gallium/targets/gbm/Makefile | 10 +++- src/gallium/targets/gbm/pipe_r600.c | 7 ++- src/gallium/targets/va-r600/Makefile | 1 + src/gallium/targets/va-r600/target.c | 6 +-- src/gallium/targets/vdpau-r600/Makefile | 1 + src/gallium/targets/vdpau-r600/target.c | 6 +-- src/gallium/targets/xvmc-r600/Makefile | 1 + src/gallium/targets/xvmc-r600/target.c | 6 +-- src/gallium/winsys/r600/drm/r600_drm.c | 50 ++++--------------- src/gallium/winsys/r600/drm/r600_drm_public.h | 4 +- src/gallium/winsys/r600/drm/r600_priv.h | 2 + .../winsys/radeon/drm/radeon_drm_winsys.c | 35 ++++++++++--- .../winsys/radeon/drm/radeon_drm_winsys.h | 6 +++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 ++ 22 files changed, 93 insertions(+), 74 deletions(-) diff --git a/configure.ac b/configure.ac index 6fa5e5177a4..ea58dae6593 100644 --- a/configure.ac +++ b/configure.ac @@ -1855,7 +1855,7 @@ if test "x$with_gallium_drivers" != x; then ;; xr600) GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600" - gallium_check_st "r600/drm" "dri-r600" "" "" "xvmc-r600" "vdpau-r600" "va-r600" + gallium_check_st "r600/drm radeon/drm" "dri-r600" "" "" "xvmc-r600" "vdpau-r600" "va-r600" ;; xnouveau) PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NOUVEAU_REQUIRED]) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 461f59439e8..6181e8b3202 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -47,6 +47,7 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" +#include "../../winsys/r600/drm/r600_drm_public.h" /* * pipe_context @@ -563,9 +564,10 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen, return TRUE; } -struct pipe_screen *r600_screen_create(struct radeon *radeon) +struct pipe_screen *r600_screen_create(struct radeon_winsys *rw) { struct r600_screen *rscreen; + struct radeon *radeon = r600_drm_winsys_create(rw); rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6f399ed43b0..c53a191594b 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -26,6 +26,8 @@ #ifndef R600_PIPE_H #define R600_PIPE_H +#include "../../winsys/radeon/drm/radeon_winsys.h" + #include #include #include @@ -183,7 +185,7 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; - struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; + struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; unsigned cb_target_mask; diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index f1970201e89..1c82a7af68f 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -23,6 +23,8 @@ #ifndef R600_PUBLIC_H #define R600_PUBLIC_H -struct pipe_screen *r600_screen_create(struct radeon *radeon); +struct radeon_winsys; + +struct pipe_screen *r600_screen_create(struct radeon_winsys *rw); #endif diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 0c4de203d35..2d7463008fe 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/noop/libnoop.a diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/dri-r600/target.c +++ b/src/gallium/targets/dri-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 42d34b8eda1..1583ab181ea 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -117,17 +117,20 @@ endif # r300 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) egl_CPPFLAGS += -D_EGL_PIPE_R300=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a endif +endif # r600 ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),) egl_CPPFLAGS += -D_EGL_PIPE_R600=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a endif diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c index 658c532b404..f2b50bd0eab 100644 --- a/src/gallium/targets/egl-static/egl_pipe.c +++ b/src/gallium/targets/egl-static/egl_pipe.c @@ -42,7 +42,6 @@ #include "radeon/drm/radeon_drm_public.h" #include "r300/r300_public.h" /* for r600 */ -#include "r600/drm/r600_drm_public.h" #include "r600/r600_public.h" /* for vmwgfx */ #include "svga/drm/svga_drm_public.h" @@ -141,10 +140,10 @@ static struct pipe_screen * pipe_r600_create_screen(int fd) { #if _EGL_PIPE_R600 - struct radeon *rw; + struct radeon_winsys *rw; struct pipe_screen *screen; - rw = r600_drm_winsys_create(fd); + rw = radeon_drm_winsys_create(fd); if (!rw) return NULL; diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index faacc89f1a0..033a1acaaf9 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -83,6 +83,7 @@ r300_LIBS = \ # r600 pipe driver r600_LIBS = \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a # vmwgfx pipe driver @@ -90,13 +91,18 @@ vmwgfx_LIBS = \ $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a + + # LLVM ifeq ($(MESA_LLVM),1) -pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a pipe_SYS += $(LLVM_LIBS) pipe_LDFLAGS += $(LLVM_LDFLAGS) endif +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) +pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +endif + # determine the targets/sources pipe_TARGETS = pipe_SOURCES = @@ -117,9 +123,11 @@ pipe_SOURCES += pipe_nouveau.c endif ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) pipe_TARGETS += $(PIPE_PREFIX)r300.so pipe_SOURCES += pipe_r300.c endif +endif ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),) pipe_TARGETS += $(PIPE_PREFIX)r600.so diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/gbm/pipe_r600.c index 486a6592585..9f61a51404a 100644 --- a/src/gallium/targets/gbm/pipe_r600.c +++ b/src/gallium/targets/gbm/pipe_r600.c @@ -1,16 +1,15 @@ - #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen * create_screen(int fd) { - struct radeon *rw; + struct radeon_winsys *rw; struct pipe_screen *screen; - rw = r600_drm_winsys_create(fd); + rw = radeon_drm_winsys_create(fd); if (!rw) return NULL; diff --git a/src/gallium/targets/va-r600/Makefile b/src/gallium/targets/va-r600/Makefile index 28797ad528d..d09a3aa8ad2 100644 --- a/src/gallium/targets/va-r600/Makefile +++ b/src/gallium/targets/va-r600/Makefile @@ -10,6 +10,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/va-r600/target.c b/src/gallium/targets/va-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/va-r600/target.c +++ b/src/gallium/targets/va-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile index 0fd817b8e82..c2d95af295a 100644 --- a/src/gallium/targets/vdpau-r600/Makefile +++ b/src/gallium/targets/vdpau-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/vdpau-r600/target.c b/src/gallium/targets/vdpau-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/vdpau-r600/target.c +++ b/src/gallium/targets/vdpau-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile index 0bb72f1eff9..a10a42d18ed 100644 --- a/src/gallium/targets/xvmc-r600/Makefile +++ b/src/gallium/targets/xvmc-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/xvmc-r600/target.c b/src/gallium/targets/xvmc-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/xvmc-r600/target.c +++ b/src/gallium/targets/xvmc-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 8aa8c3df52a..325547ab4ec 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -37,6 +37,7 @@ #include "r600_drm_public.h" #include "xf86drm.h" #include "radeon_drm.h" +#include "../../radeon/drm/radeon_winsys.h" #ifndef RADEON_INFO_TILING_CONFIG #define RADEON_INFO_TILING_CONFIG 0x6 @@ -98,20 +99,6 @@ unsigned r600_get_minor_version(struct radeon *radeon) return radeon->minor_version; } - -static int radeon_get_device(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - int r; - - radeon->device = 0; - info.request = RADEON_INFO_DEVICE_ID; - info.value = (uintptr_t)&radeon->device; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - return r; -} - static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) { switch ((tiling_config & 0xe) >> 1) { @@ -320,39 +307,22 @@ static int handle_compare(void *key1, void *key2) return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); } -static struct radeon *radeon_new(int fd, unsigned device) +static struct radeon *radeon_new(struct radeon_winsys *rw) { struct radeon *radeon; int r; - drmVersionPtr version; radeon = calloc(1, sizeof(*radeon)); if (radeon == NULL) { return NULL; } - radeon->fd = fd; - radeon->device = device; + + rw->query_info(rw, &radeon->info); + radeon->fd = radeon->info.fd; + radeon->device = radeon->info.pci_id; + radeon->num_backends = radeon->info.r600_num_backends; radeon->refcount = 1; - - version = drmGetVersion(radeon->fd); - if (version->version_major != 2) { - fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.x.x\n", __FUNCTION__, - version->version_major, version->version_minor, - version->version_patchlevel); - drmFreeVersion(version); - exit(1); - } - - radeon->minor_version = version->version_minor; - - drmFreeVersion(version); - - r = radeon_get_device(radeon); - if (r) { - fprintf(stderr, "Failed to get device id\n"); - return radeon_decref(radeon); - } + radeon->minor_version = radeon->info.drm_minor; radeon->family = radeon_family_from_device(radeon->device); if (radeon->family == CHIP_UNKNOWN) { @@ -436,9 +406,9 @@ static struct radeon *radeon_new(int fd, unsigned device) return radeon; } -struct radeon *r600_drm_winsys_create(int drmfd) +struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw) { - return radeon_new(drmfd, 0); + return radeon_new(rw); } struct radeon *radeon_decref(struct radeon *radeon) diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h index cfce8df9c2c..1d990f91013 100644 --- a/src/gallium/winsys/r600/drm/r600_drm_public.h +++ b/src/gallium/winsys/r600/drm/r600_drm_public.h @@ -26,8 +26,8 @@ #ifndef R600_DRM_PUBLIC_H #define R600_DRM_PUBLIC_H -struct radeon; +struct radeon_winsys; -struct radeon *r600_drm_winsys_create(int drmFD); +struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw); #endif diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 75115fdaed7..7630b30b5f0 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -35,6 +35,7 @@ #include "util/u_hash_table.h" #include #include "r600.h" +#include "../../radeon/drm/radeon_winsys.h" #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) @@ -43,6 +44,7 @@ struct r600_bomgr; struct r600_bo; struct radeon { + struct radeon_info info; int fd; int refcount; unsigned device; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 5983e86c570..faeb66c8908 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -47,6 +47,9 @@ #ifndef RADEON_INFO_WANT_CMASK #define RADEON_INFO_WANT_CMASK 8 #endif +#ifndef RADEON_INFO_NUM_BACKENDS +#define RADEON_INFO_NUM_BACKENDS 10 +#endif /* Enable/disable feature access for one command stream. * If enable == TRUE, return TRUE on success. @@ -175,6 +178,13 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) #define CHIPSET(pci_id, name, family) case pci_id: #include "pci_ids/r300_pci_ids.h" #undef CHIPSET + ws->gen = R300; + break; + +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r600_pci_ids.h" +#undef CHIPSET + ws->gen = R600; break; default: @@ -196,15 +206,23 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); /* Generation-specific queries. */ - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, - "GB pipe count", - &ws->info.r300_num_gb_pipes)) - return FALSE; + if (ws->gen == R300) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, + "GB pipe count", + &ws->info.r300_num_gb_pipes)) + return FALSE; - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, - "Z pipe count", - &ws->info.r300_num_z_pipes)) - return FALSE; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, + "Z pipe count", + &ws->info.r300_num_z_pipes)) + return FALSE; + } + else if (ws->gen == R600) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, + "num backends", + &ws->info.r600_num_backends)) + return FALSE; + } return TRUE; } @@ -263,6 +281,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) } ws->fd = fd; + ws->info.fd = fd; if (!do_winsys_init(ws)) goto fail; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index 347e1f1d11a..69216448496 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -33,12 +33,18 @@ #include "radeon_winsys.h" #include "os/os_thread.h" +enum radeon_generation { + R300, + R600 +}; + struct radeon_drm_winsys { struct radeon_winsys base; int fd; /* DRM file descriptor */ int num_cs; /* The number of command streams created. */ + enum radeon_generation gen; struct radeon_info info; struct pb_manager *kman; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 6d52dc25022..2a49e615981 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -73,6 +73,7 @@ struct radeon_info { uint32_t pci_id; uint32_t gart_size; uint32_t vram_size; + uint32_t fd; /* XXX transitional */ uint32_t drm_major; /* version */ uint32_t drm_minor; @@ -80,6 +81,8 @@ struct radeon_info { uint32_t r300_num_gb_pipes; uint32_t r300_num_z_pipes; + + uint32_t r600_num_backends; }; enum radeon_feature_id { From fb8cf51eeb91413e761e0510d1f8c11b8cd0a7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 20:15:47 +0200 Subject: [PATCH 308/600] r600g: move some queries into winsys/radeon Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_drm.c | 91 +++---------------- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- src/gallium/winsys/r600/drm/r600_priv.h | 6 -- src/gallium/winsys/r600/drm/radeon_bo.c | 18 ++-- .../winsys/radeon/drm/radeon_drm_winsys.c | 32 +++++-- src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 + 6 files changed, 51 insertions(+), 100 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 325547ab4ec..ab15257efb2 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -76,12 +76,12 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) unsigned r600_get_clock_crystal_freq(struct radeon *radeon) { - return radeon->clock_crystal_freq; + return radeon->info.r600_clock_crystal_freq; } unsigned r600_get_num_backends(struct radeon *radeon) { - return radeon->num_backends; + return radeon->info.r600_num_backends; } unsigned r600_get_num_tile_pipes(struct radeon *radeon) @@ -96,7 +96,7 @@ unsigned r600_get_backend_map(struct radeon *radeon) unsigned r600_get_minor_version(struct radeon *radeon) { - return radeon->minor_version; + return radeon->info.drm_minor; } static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) @@ -191,59 +191,16 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) static int radeon_drm_get_tiling(struct radeon *radeon) { - struct drm_radeon_info info = {}; - int r; - uint32_t tiling_config = 0; + uint32_t tiling_config = radeon->info.r600_tiling_config; - info.request = RADEON_INFO_TILING_CONFIG; - info.value = (uintptr_t)&tiling_config; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - - if (r) + if (!tiling_config) return 0; if (radeon->chip_class == R600 || radeon->chip_class == R700) { - r = r600_interpret_tiling(radeon, tiling_config); + return r600_interpret_tiling(radeon, tiling_config); } else { - r = eg_interpret_tiling(radeon, tiling_config); + return eg_interpret_tiling(radeon, tiling_config); } - return r; -} - -static int radeon_get_clock_crystal_freq(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t clock_crystal_freq = 0; - int r; - - info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; - info.value = (uintptr_t)&clock_crystal_freq; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->clock_crystal_freq = clock_crystal_freq; - return 0; -} - - -static int radeon_get_num_backends(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t num_backends = 0; - int r; - - info.request = RADEON_INFO_NUM_BACKENDS; - info.value = (uintptr_t)&num_backends; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->num_backends = num_backends; - return 0; } static int radeon_get_num_tile_pipes(struct radeon *radeon) @@ -254,7 +211,7 @@ static int radeon_get_num_tile_pipes(struct radeon *radeon) info.request = RADEON_INFO_NUM_TILE_PIPES; info.value = (uintptr_t)&num_tile_pipes; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info, sizeof(struct drm_radeon_info)); if (r) return r; @@ -271,7 +228,7 @@ static int radeon_get_backend_map(struct radeon *radeon) info.request = RADEON_INFO_BACKEND_MAP; info.value = (uintptr_t)&backend_map; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info, sizeof(struct drm_radeon_info)); if (r) return r; @@ -282,7 +239,6 @@ static int radeon_get_backend_map(struct radeon *radeon) return 0; } - static int radeon_init_fence(struct radeon *radeon) { radeon->fence = 1; @@ -307,7 +263,7 @@ static int handle_compare(void *key1, void *key2) return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); } -static struct radeon *radeon_new(struct radeon_winsys *rw) +struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw) { struct radeon *radeon; int r; @@ -318,15 +274,10 @@ static struct radeon *radeon_new(struct radeon_winsys *rw) } rw->query_info(rw, &radeon->info); - radeon->fd = radeon->info.fd; - radeon->device = radeon->info.pci_id; - radeon->num_backends = radeon->info.r600_num_backends; - radeon->refcount = 1; - radeon->minor_version = radeon->info.drm_minor; - radeon->family = radeon_family_from_device(radeon->device); + radeon->family = radeon_family_from_device(radeon->info.pci_id); if (radeon->family == CHIP_UNKNOWN) { - fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); + fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id); return radeon_decref(radeon); } /* setup class */ @@ -373,20 +324,14 @@ static struct radeon *radeon_new(struct radeon_winsys *rw) break; default: fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", - __func__, radeon->device); + __func__, radeon->info.pci_id); break; } if (radeon_drm_get_tiling(radeon)) return NULL; - /* get the GPU counter frequency, failure is non fatal */ - radeon_get_clock_crystal_freq(radeon); - - if (radeon->minor_version >= 9) - radeon_get_num_backends(radeon); - - if (radeon->minor_version >= 11) { + if (radeon->info.drm_minor >= 11) { radeon_get_num_tile_pipes(radeon); radeon_get_backend_map(radeon); } @@ -406,18 +351,10 @@ static struct radeon *radeon_new(struct radeon_winsys *rw) return radeon; } -struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw) -{ - return radeon_new(rw); -} - struct radeon *radeon_decref(struct radeon *radeon) { if (radeon == NULL) return NULL; - if (--radeon->refcount > 0) { - return NULL; - } util_hash_table_destroy(radeon->bo_handles); pipe_mutex_destroy(radeon->bo_handles_mutex); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 30af4e8066f..46ca4ed907a 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1621,7 +1621,7 @@ void r600_context_flush(struct r600_context *ctx) chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; - r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, + r = drmCommandWriteRead(ctx->radeon->info.fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); if (r) { fprintf(stderr, "radeon: The kernel rejected CS, " diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 7630b30b5f0..9fc7c534646 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -45,9 +45,6 @@ struct r600_bo; struct radeon { struct radeon_info info; - int fd; - int refcount; - unsigned device; unsigned family; enum chip_class chip_class; struct r600_tiling_info tiling_info; @@ -55,12 +52,9 @@ struct radeon { unsigned fence; unsigned *cfence; struct r600_bo *fence_bo; - unsigned clock_crystal_freq; - unsigned num_backends; unsigned num_tile_pipes; unsigned backend_map; boolean backend_map_valid; - unsigned minor_version; /* List of buffer handles and its mutex. */ struct util_hash_table *bo_handles; diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 45cf6f09671..34696da515b 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -44,14 +44,14 @@ int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) args.handle = bo->handle; args.offset = 0; args.size = (uint64_t)bo->size; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP, + r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_MMAP, &args, sizeof(args)); if (r) { fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", bo, bo->handle, r); return r; } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr); + ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->info.fd, args.addr_ptr); if (ptr == MAP_FAILED) { fprintf(stderr, "%s failed to map bo\n", __func__); return -errno; @@ -101,7 +101,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, memset(&open_arg, 0, sizeof(open_arg)); open_arg.name = handle; - r = drmIoctl(radeon->fd, DRM_IOCTL_GEM_OPEN, &open_arg); + r = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_OPEN, &open_arg); if (r != 0) { free(bo); return NULL; @@ -118,7 +118,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, args.initial_domain = initial_domain; args.flags = 0; args.handle = 0; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE, + r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_CREATE, &args, sizeof(args)); bo->handle = args.handle; if (r) { @@ -153,7 +153,7 @@ static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) radeon_bo_fixed_unmap(radeon, bo); memset(&args, 0, sizeof(args)); args.handle = bo->handle; - drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args); + drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_CLOSE, &args); memset(bo, 0, sizeof(struct radeon_bo)); free(bo); } @@ -188,7 +188,7 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) memset(&args, 0, sizeof(args)); args.handle = bo->handle; do { - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_WAIT_IDLE, + ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args)); } while (ret == -EBUSY); return ret; @@ -213,7 +213,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain args.handle = bo->handle; args.domain = 0; - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, + ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)); *domain = args.domain; @@ -229,7 +229,7 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon, int ret; args.handle = bo->handle; - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING, + ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_GET_TILING, &args, sizeof(args)); if (ret) return ret; @@ -247,7 +247,7 @@ int radeon_bo_get_name(struct radeon *radeon, int ret; flink.handle = bo->handle; - ret = drmIoctl(radeon->fd, DRM_IOCTL_GEM_FLINK, &flink); + ret = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_FLINK, &flink); if (ret) return ret; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index faeb66c8908..3be6e34f6f0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -41,12 +41,22 @@ #include #include +#ifndef RADEON_INFO_TILING_CONFIG +#define RADEON_INFO_TILING_CONFIG 6 +#endif + #ifndef RADEON_INFO_WANT_HYPERZ #define RADEON_INFO_WANT_HYPERZ 7 #endif + #ifndef RADEON_INFO_WANT_CMASK #define RADEON_INFO_WANT_CMASK 8 #endif + +#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ +#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 9 +#endif + #ifndef RADEON_INFO_NUM_BACKENDS #define RADEON_INFO_NUM_BACKENDS 10 #endif @@ -107,7 +117,7 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier, } static boolean radeon_get_drm_value(int fd, unsigned request, - const char *name, uint32_t *out) + const char *errname, uint32_t *out) { struct drm_radeon_info info = {0}; int retval; @@ -116,9 +126,9 @@ static boolean radeon_get_drm_value(int fd, unsigned request, info.request = request; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get %s, error number %d\n", - __func__, name, retval); + if (retval && errname) { + fprintf(stderr, "radeon: Failed to get %s, error number %d\n", + errname, retval); return FALSE; } return TRUE; @@ -196,8 +206,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { - fprintf(stderr, "%s: Failed to get MM info, error number %d\n", - __FUNCTION__, retval); + fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", + retval); return FALSE; } ws->info.gart_size = gem_info.gart_size; @@ -218,10 +228,18 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) return FALSE; } else if (ws->gen == R600) { - if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, + if (ws->info.drm_minor >= 9 && + !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, "num backends", &ws->info.r600_num_backends)) return FALSE; + + /* get the GPU counter frequency, failure is not fatal */ + radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, + &ws->info.r600_clock_crystal_freq); + + radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, + &ws->info.r600_tiling_config); } return TRUE; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 2a49e615981..2948ea78c18 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -83,6 +83,8 @@ struct radeon_info { uint32_t r300_num_z_pipes; uint32_t r600_num_backends; + uint32_t r600_clock_crystal_freq; + uint32_t r600_tiling_config; }; enum radeon_feature_id { From 354f76f386afd980e8c1564c0b0502f9768007b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Jul 2011 21:38:56 +0200 Subject: [PATCH 309/600] r600g: cleanup includes in winsys Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 8 ++---- src/gallium/drivers/r600/r600_asm.h | 2 -- .../winsys/r600/drm/evergreen_hw_context.c | 17 ++++-------- src/gallium/winsys/r600/drm/r600_bo.c | 6 +---- src/gallium/winsys/r600/drm/r600_bomgr.c | 6 ++--- src/gallium/winsys/r600/drm/r600_drm.c | 27 +++---------------- src/gallium/winsys/r600/drm/r600_hw_context.c | 15 +++-------- src/gallium/winsys/r600/drm/r600_priv.h | 10 ++----- src/gallium/winsys/r600/drm/radeon_bo.c | 10 +++---- src/gallium/winsys/r600/drm/radeon_pciid.c | 1 - 10 files changed, 24 insertions(+), 78 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 2ac5ed465c1..714af0c112d 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,12 +26,8 @@ #ifndef R600_H #define R600_H -#include -#include -#include -#include -#include -#include +#include "util/u_double_list.h" +#include "util/u_inlines.h" #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index cbdaacf7178..5dec95acf1d 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -23,8 +23,6 @@ #ifndef R600_ASM_H #define R600_ASM_H -#include "util/u_double_list.h" - struct r600_vertex_element; struct r600_pipe_context; diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 7fe2050cd84..412533e44bc 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -23,19 +23,13 @@ * Authors: * Jerome Glisse */ -#include -#include -#include -#include -#include -#include "xf86drm.h" #include "r600.h" -#include "evergreend.h" -#include "radeon_drm.h" -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" #include "r600_priv.h" +#include "evergreend.h" +#include "util/u_memory.h" +#include "radeon_drm.h" +#include "xf86drm.h" +#include #define GROUP_FORCE_NEW_BLOCK 0 @@ -1271,4 +1265,3 @@ void evergreen_context_flush_dest_caches(struct r600_context *ctx) ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; } - diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 0f5b063cf5a..f6e15630d71 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -23,13 +23,9 @@ * Authors: * Dave Airlie */ -#include -#include -#include -#include "state_tracker/drm_driver.h" #include "r600_priv.h" #include "r600d.h" -#include "drm.h" +#include "state_tracker/drm_driver.h" #include "radeon_drm.h" struct r600_bo *r600_bo(struct radeon *radeon, diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c index 4918d5eb0b1..5cea7654d97 100644 --- a/src/gallium/winsys/r600/drm/r600_bomgr.c +++ b/src/gallium/winsys/r600/drm/r600_bomgr.c @@ -26,11 +26,9 @@ * Thomas Hellström * Jerome Glisse */ -#include -#include -#include -#include #include "r600_priv.h" +#include "util/u_memory.h" +#include "util/u_time.h" static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr) { diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index ab15257efb2..3a2186c6f4a 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -25,31 +25,12 @@ * Corbin Simpson * Joakim Sindholt */ -#include -#include -#include -#include "util/u_inlines.h" -#include "util/u_debug.h" -#include "util/u_hash_table.h" -#include -#include "r600.h" + #include "r600_priv.h" #include "r600_drm_public.h" -#include "xf86drm.h" -#include "radeon_drm.h" -#include "../../radeon/drm/radeon_winsys.h" - -#ifndef RADEON_INFO_TILING_CONFIG -#define RADEON_INFO_TILING_CONFIG 0x6 -#endif - -#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ -#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9 -#endif - -#ifndef RADEON_INFO_NUM_BACKENDS -#define RADEON_INFO_NUM_BACKENDS 0xa -#endif +#include +#include +#include #ifndef RADEON_INFO_NUM_TILE_PIPES #define RADEON_INFO_NUM_TILE_PIPES 0xb diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 46ca4ed907a..60de24fa67f 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -23,19 +23,12 @@ * Authors: * Jerome Glisse */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "xf86drm.h" -#include "radeon_drm.h" #include "r600_priv.h" #include "r600d.h" +#include "util/u_memory.h" +#include "radeon_drm.h" +#include "xf86drm.h" +#include #define GROUP_FORCE_NEW_BLOCK 0 diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 9fc7c534646..08a3552803c 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -26,16 +26,10 @@ #ifndef R600_PRIV_H #define R600_PRIV_H -#include -#include -#include -#include -#include -#include -#include "util/u_hash_table.h" -#include #include "r600.h" #include "../../radeon/drm/radeon_winsys.h" +#include "util/u_hash_table.h" +#include "os/os_thread.h" #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 34696da515b..80336de4537 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -24,14 +24,12 @@ * Jerome Glisse */ #define _FILE_OFFSET_BITS 64 -#include -#include -#include +#include "r600_priv.h" +#include "util/u_hash_table.h" +#include "radeon_drm.h" +#include "xf86drm.h" #include #include -#include "r600_priv.h" -#include "xf86drm.h" -#include "radeon_drm.h" int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) { diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index f54a7c8fe72..87572417c80 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -23,7 +23,6 @@ * Authors: * Jerome Glisse */ -#include #include "r600_priv.h" struct pci_id { From 2ce783d8ddec1b1fcadc0798af0ebb045bba1cc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Aug 2011 20:25:13 +0200 Subject: [PATCH 310/600] r600g: put radeon_winsys in screen::winsys, don't include drm_driver in the pipe Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 7 +++--- src/gallium/drivers/r600/r600_buffer.c | 23 ++++++++++--------- src/gallium/drivers/r600/r600_pipe.c | 10 ++++---- src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_public.h | 2 +- src/gallium/drivers/r600/r600_texture.c | 22 ++++++++---------- .../winsys/r600/drm/evergreen_hw_context.c | 1 + src/gallium/winsys/r600/drm/r600_bo.c | 11 +++++---- src/gallium/winsys/r600/drm/r600_drm.c | 18 +++++++-------- src/gallium/winsys/r600/drm/r600_drm_public.h | 2 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 1 + src/gallium/winsys/r600/drm/r600_priv.h | 1 + 12 files changed, 54 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 714af0c112d..7e704730122 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -95,11 +95,12 @@ unsigned r600_get_backend_map(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; + struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage); -struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); +struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, + unsigned *stride, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, @@ -315,7 +316,7 @@ void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, stru void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); -struct radeon *radeon_decref(struct radeon *radeon); +struct radeon *radeon_destroy(struct radeon *radeon); void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 72f352df3c3..28d8c6af1cb 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -33,8 +33,6 @@ #include #include "util/u_upload_mgr.h" -#include "state_tracker/drm_driver.h" - #include #include "radeon_drm.h" @@ -48,7 +46,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { - r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); + r600_bo_reference(rscreen->radeon, &rbuffer->r.bo, NULL); } rbuffer->r.bo = NULL; util_slab_free(&rscreen->pool_buffers, rbuffer); @@ -81,12 +79,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; uint8_t *data; if (rbuffer->r.b.user_ptr) return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; - data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); + data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, transfer->usage, pipe); if (!data) return NULL; @@ -97,12 +96,13 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; if (rbuffer->r.b.user_ptr) return; if (rbuffer->r.bo) - r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); + r600_bo_unmap(rctx->screen->radeon, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, @@ -127,20 +127,21 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, unsigned stride, unsigned layer_stride) { - struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; + struct radeon *radeon = rctx->screen->radeon; struct r600_resource_buffer *rbuffer = r600_buffer(resource); uint8_t *map = NULL; assert(rbuffer->r.b.user_ptr == NULL); - map = r600_bo_map(ws, rbuffer->r.bo, + map = r600_bo_map(radeon, rbuffer->r.bo, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, pipe); memcpy(map + box->x, data, box->width); if (rbuffer->r.bo) - r600_bo_unmap(ws, rbuffer->r.bo); + r600_bo_unmap(radeon, rbuffer->r.bo); } static const struct u_resource_vtbl r600_buffer_vtbl = @@ -175,7 +176,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.size = rbuffer->r.b.b.b.width0; rbuffer->r.bo_size = rbuffer->r.size; - bo = r600_bo((struct radeon*)screen->winsys, + bo = r600_bo(rscreen->radeon, rbuffer->r.b.b.b.width0, alignment, rbuffer->r.b.b.b.bind, rbuffer->r.b.b.b.usage); @@ -219,11 +220,11 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { - struct radeon *rw = (struct radeon*)screen->winsys; + struct radeon *rw = ((struct r600_screen*)screen)->radeon; struct r600_resource *rbuffer; struct r600_bo *bo = NULL; - bo = r600_bo_handle(rw, whandle->handle, NULL); + bo = r600_bo_handle(rw, whandle, NULL, NULL); if (bo == NULL) { return NULL; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 6181e8b3202..8a18207d1ea 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -500,7 +500,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) if (rscreen == NULL) return; - radeon_decref(rscreen->radeon); + radeon_destroy(rscreen->radeon); + rscreen->ws->destroy(rscreen->ws); util_slab_destroy(&rscreen->pool_buffers); pipe_mutex_destroy(rscreen->mutex_num_contexts); @@ -564,18 +565,19 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen, return TRUE; } -struct pipe_screen *r600_screen_create(struct radeon_winsys *rw) +struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) { struct r600_screen *rscreen; - struct radeon *radeon = r600_drm_winsys_create(rw); + struct radeon *radeon = radeon_create(ws); rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } + rscreen->ws = ws; rscreen->radeon = radeon; - rscreen->screen.winsys = (struct pipe_winsys*)radeon; + rscreen->screen.winsys = (struct pipe_winsys*)ws; rscreen->screen.destroy = r600_destroy_screen; rscreen->screen.get_name = r600_get_name; rscreen->screen.get_vendor = r600_get_vendor; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index c53a191594b..3ca003aa244 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -74,6 +74,7 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; + struct radeon_winsys *ws; struct radeon *radeon; struct r600_tiling_info *tiling_info; struct util_slab_mempool pool_buffers; diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index 1c82a7af68f..e4fe23a87b7 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -25,6 +25,6 @@ struct radeon_winsys; -struct pipe_screen *r600_screen_create(struct radeon_winsys *rw); +struct pipe_screen *r600_screen_create(struct radeon_winsys *ws); #endif diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 927eb5dafc9..386d8f35015 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,7 +31,6 @@ #include #include #include -#include "state_tracker/drm_driver.h" #include "pipebuffer/pb_buffer.h" #include "r600_pipe.h" #include "r600_resource.h" @@ -239,7 +238,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, unsigned array_mode) { struct pipe_resource *ptex = &rtex->resource.b.b.b; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; enum chip_class chipc = r600_get_family_class(radeon); unsigned size, layer_size, i, offset; unsigned nblocksx, nblocksy, extra_size = 0; @@ -329,7 +328,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; return r600_bo_get_winsys_handle(radeon, resource->bo, rtex->pitch_in_bytes[0], whandle); @@ -340,7 +339,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; if (rtex->flushed_depth_texture) pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); @@ -373,7 +372,7 @@ r600_texture_create_object(struct pipe_screen *screen, { struct r600_resource_texture *rtex; struct r600_resource *resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; rtex = CALLOC_STRUCT(r600_resource_texture); if (rtex == NULL) @@ -483,8 +482,9 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { - struct radeon *rw = (struct radeon*)screen->winsys; + struct radeon *rw = ((struct r600_screen*)screen)->radeon; struct r600_bo *bo = NULL; + unsigned stride = 0; unsigned array_mode = 0; /* Support only 2D textures without mipmaps */ @@ -492,15 +492,13 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, templ->depth0 != 1 || templ->last_level != 0) return NULL; - bo = r600_bo_handle(rw, whandle->handle, &array_mode); + bo = r600_bo_handle(rw, whandle, &stride, &array_mode); if (bo == NULL) { return NULL; } return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, - whandle->stride, - 0, - bo); + stride, 0, bo); } int r600_texture_depth_flush(struct pipe_context *ctx, @@ -687,7 +685,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_bo *bo; enum pipe_format format = transfer->resource->format; - struct radeon *radeon = (struct radeon *)ctx->screen->winsys; + struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon; unsigned offset = 0; char *map; @@ -717,7 +715,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct radeon *radeon = (struct radeon *)ctx->screen->winsys; + struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon; struct r600_bo *bo; if (rtransfer->staging_texture) { diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 412533e44bc..94206d5568b 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -902,6 +902,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); /* init dirty list */ diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index f6e15630d71..bbd52a1fdde 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -86,23 +86,26 @@ struct r600_bo *r600_bo(struct radeon *radeon, return bo; } -struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode) +struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, + unsigned *stride, unsigned *array_mode) { struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); struct radeon_bo *rbo; - rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0); + rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0); if (rbo == NULL) { free(bo); return NULL; } + + pipe_reference_init(&bo->reference, 1); bo->size = rbo->size; bo->domains = (RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - pipe_reference_init(&bo->reference, 1); + if (stride) + *stride = whandle->stride; radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch); if (array_mode) { diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 3a2186c6f4a..1cf905f2575 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -28,6 +28,7 @@ #include "r600_priv.h" #include "r600_drm_public.h" +#include "util/u_memory.h" #include #include #include @@ -244,22 +245,21 @@ static int handle_compare(void *key1, void *key2) return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); } -struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw) +struct radeon *radeon_create(struct radeon_winsys *ws) { - struct radeon *radeon; int r; - - radeon = calloc(1, sizeof(*radeon)); + struct radeon *radeon = CALLOC_STRUCT(radeon); if (radeon == NULL) { return NULL; } - rw->query_info(rw, &radeon->info); + radeon->ws = ws; + ws->query_info(ws, &radeon->info); radeon->family = radeon_family_from_device(radeon->info.pci_id); if (radeon->family == CHIP_UNKNOWN) { fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id); - return radeon_decref(radeon); + return radeon_destroy(radeon); } /* setup class */ switch (radeon->family) { @@ -323,7 +323,7 @@ struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw) } r = radeon_init_fence(radeon); if (r) { - radeon_decref(radeon); + radeon_destroy(radeon); return NULL; } @@ -332,7 +332,7 @@ struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw) return radeon; } -struct radeon *radeon_decref(struct radeon *radeon) +struct radeon *radeon_destroy(struct radeon *radeon) { if (radeon == NULL) return NULL; @@ -346,6 +346,6 @@ struct radeon *radeon_decref(struct radeon *radeon) if (radeon->bomgr) r600_bomgr_destroy(radeon->bomgr); - free(radeon); + FREE(radeon); return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h index 1d990f91013..b8a37c7574f 100644 --- a/src/gallium/winsys/r600/drm/r600_drm_public.h +++ b/src/gallium/winsys/r600/drm/r600_drm_public.h @@ -28,6 +28,6 @@ struct radeon_winsys; -struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw); +struct radeon *radeon_create(struct radeon_winsys *ws); #endif diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 60de24fa67f..8ab4d94a6ee 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -874,6 +874,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); /* init dirty list */ diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 08a3552803c..8296aa1bdd1 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -38,6 +38,7 @@ struct r600_bomgr; struct r600_bo; struct radeon { + struct radeon_winsys *ws; struct radeon_info info; unsigned family; enum chip_class chip_class; From c092e236cc049cc56e9d0d337defc96729cf5830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Aug 2011 21:18:10 +0200 Subject: [PATCH 311/600] r600g: remove unused r600_bo::kernel_pitch Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 2 +- src/gallium/winsys/r600/drm/r600_priv.h | 4 +--- src/gallium/winsys/r600/drm/radeon_bo.c | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index bbd52a1fdde..f74e0a498cd 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -107,7 +107,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan if (stride) *stride = whandle->stride; - radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch); + radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags); if (array_mode) { if (bo->tiling_flags) { if (bo->tiling_flags & RADEON_TILING_MACRO) diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 8296aa1bdd1..50783a61069 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -96,7 +96,6 @@ struct r600_bo { /* DO NOT MOVE THIS ^ */ unsigned size; unsigned tiling_flags; - unsigned kernel_pitch; unsigned domains; struct radeon_bo *bo; unsigned fence; @@ -140,8 +139,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); int radeon_bo_get_tiling_flags(struct radeon *radeon, struct radeon_bo *bo, - uint32_t *tiling_flags, - uint32_t *pitch); + uint32_t *tiling_flags); int radeon_bo_get_name(struct radeon *radeon, struct radeon_bo *bo, uint32_t *name); diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 80336de4537..4e61c942a26 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -220,8 +220,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain int radeon_bo_get_tiling_flags(struct radeon *radeon, struct radeon_bo *bo, - uint32_t *tiling_flags, - uint32_t *pitch) + uint32_t *tiling_flags) { struct drm_radeon_gem_get_tiling args = {}; int ret; @@ -233,7 +232,6 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon, return ret; *tiling_flags = args.tiling_flags; - *pitch = args.pitch; return ret; } From 39db886548b9e93a6a91bf76095848af44972e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Aug 2011 23:03:11 +0200 Subject: [PATCH 312/600] r600g: remove unused r600_bo::tiling_flags Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 9 +++++---- src/gallium/winsys/r600/drm/r600_priv.h | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index f74e0a498cd..9e6c7cc7639 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -91,6 +91,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan { struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); struct radeon_bo *rbo; + unsigned tiling_flags; rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0); if (rbo == NULL) { @@ -107,12 +108,12 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan if (stride) *stride = whandle->stride; - radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags); + radeon_bo_get_tiling_flags(radeon, rbo, &tiling_flags); if (array_mode) { - if (bo->tiling_flags) { - if (bo->tiling_flags & RADEON_TILING_MACRO) + if (tiling_flags) { + if (tiling_flags & RADEON_TILING_MACRO) *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; - else if (bo->tiling_flags & RADEON_TILING_MICRO) + else if (tiling_flags & RADEON_TILING_MICRO) *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; } else { *array_mode = 0; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 50783a61069..c310defe2b1 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -95,7 +95,6 @@ struct r600_bo { struct pipe_reference reference; /* this must be the first member for the r600_bo_reference inline to work */ /* DO NOT MOVE THIS ^ */ unsigned size; - unsigned tiling_flags; unsigned domains; struct radeon_bo *bo; unsigned fence; From edca57e53318121a7bd909c0c36dcffe5d9ac15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Aug 2011 23:58:57 +0200 Subject: [PATCH 313/600] r600g: remove unused function declarations Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_priv.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index c310defe2b1..ee3df9c93f9 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -115,12 +115,6 @@ struct r600_bomgr { unsigned num_delayed; }; -/* - * r600_drm.c - */ -struct radeon *r600_new(int fd, unsigned device); -void r600_delete(struct radeon *r600); - /* * radeon_pciid.c */ From 11daa7e325cc2653269d3c01844ff77fdd515de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 01:03:13 +0200 Subject: [PATCH 314/600] r600g: allocate/destroy buffers using radeon_winsys We use the cache buffer manager from radeon_winsys now, but we don't use anything else yet. Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 4 +- src/gallium/winsys/r600/drm/r600_drm.c | 16 ---- src/gallium/winsys/r600/drm/r600_priv.h | 10 +-- src/gallium/winsys/r600/drm/radeon_bo.c | 77 +++++-------------- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 7 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 4 + 6 files changed, 35 insertions(+), 83 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 9e6c7cc7639..8cf1d809235 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -68,7 +68,7 @@ struct r600_bo *r600_bo(struct radeon *radeon, initial_domain = RADEON_GEM_DOMAIN_VRAM; break; } - rbo = radeon_bo(radeon, 0, size, alignment, initial_domain); + rbo = radeon_bo(radeon, 0, size, alignment, binding, initial_domain); if (rbo == NULL) { return NULL; } @@ -93,7 +93,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan struct radeon_bo *rbo; unsigned tiling_flags; - rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0); + rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0, 0); if (rbo == NULL) { free(bo); return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 1cf905f2575..f2e46d3d0d9 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -233,18 +233,6 @@ static int radeon_init_fence(struct radeon *radeon) return 0; } -#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) - -static unsigned handle_hash(void *key) -{ - return PTR_TO_UINT(key); -} - -static int handle_compare(void *key1, void *key2) -{ - return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); -} - struct radeon *radeon_create(struct radeon_winsys *ws) { int r; @@ -327,8 +315,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws) return NULL; } - radeon->bo_handles = util_hash_table_create(handle_hash, handle_compare); - pipe_mutex_init(radeon->bo_handles_mutex); return radeon; } @@ -337,8 +323,6 @@ struct radeon *radeon_destroy(struct radeon *radeon) if (radeon == NULL) return NULL; - util_hash_table_destroy(radeon->bo_handles); - pipe_mutex_destroy(radeon->bo_handles_mutex); if (radeon->fence_bo) { r600_bo_reference(radeon, &radeon->fence_bo, NULL); } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index ee3df9c93f9..621c7eb2158 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -50,10 +50,6 @@ struct radeon { unsigned num_tile_pipes; unsigned backend_map; boolean backend_map_valid; - - /* List of buffer handles and its mutex. */ - struct util_hash_table *bo_handles; - pipe_mutex bo_handles_mutex; }; /* these flags are used in register flags and added into block flags */ @@ -75,9 +71,10 @@ struct r600_reg { #define BO_BOUND_TEXTURE 1 struct radeon_bo { struct pipe_reference reference; + struct pb_buffer *buf; + unsigned handle; unsigned size; - unsigned alignment; int map_count; void *data; struct list_head fencedlist; @@ -87,7 +84,6 @@ struct radeon_bo { struct r600_reloc *reloc; unsigned reloc_id; unsigned last_flush; - unsigned name; unsigned binding; }; @@ -124,7 +120,7 @@ unsigned radeon_family_from_device(unsigned device); * radeon_bo.c */ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned initial_domain); + unsigned size, unsigned alignment, unsigned bind, unsigned initial_domain); void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, struct radeon_bo *src); int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 4e61c942a26..7ff4806c056 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -26,6 +26,7 @@ #define _FILE_OFFSET_BITS 64 #include "r600_priv.h" #include "util/u_hash_table.h" +#include "util/u_memory.h" #include "radeon_drm.h" #include "xf86drm.h" #include @@ -68,22 +69,16 @@ static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo) } } +#include "state_tracker/drm_driver.h" + struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned initial_domain) + unsigned size, unsigned alignment, unsigned bind, + unsigned initial_domain) { struct radeon_bo *bo; - int r; + struct winsys_handle whandle = {}; + whandle.handle = handle; - if (handle) { - pipe_mutex_lock(radeon->bo_handles_mutex); - bo = util_hash_table_get(radeon->bo_handles, - (void *)(uintptr_t)handle); - if (bo) { - struct radeon_bo *b = NULL; - radeon_bo_reference(radeon, &b, bo); - goto done; - } - } bo = calloc(1, sizeof(*bo)); if (bo == NULL) { return NULL; @@ -91,69 +86,35 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, bo->size = size; bo->handle = handle; pipe_reference_init(&bo->reference, 1); - bo->alignment = alignment; LIST_INITHEAD(&bo->fencedlist); if (handle) { - struct drm_gem_open open_arg; - - memset(&open_arg, 0, sizeof(open_arg)); - open_arg.name = handle; - r = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_OPEN, &open_arg); - if (r != 0) { - free(bo); + unsigned size; + bo->buf = radeon->ws->buffer_from_handle(radeon->ws, &whandle, NULL, &size); + if (!bo->buf) { + FREE(bo); return NULL; } - bo->name = handle; - bo->handle = open_arg.handle; - bo->size = open_arg.size; + bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); + bo->size = size; bo->shared = TRUE; } else { - struct drm_radeon_gem_create args = {}; - - args.size = size; - args.alignment = alignment; - args.initial_domain = initial_domain; - args.flags = 0; - args.handle = 0; - r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_CREATE, - &args, sizeof(args)); - bo->handle = args.handle; - if (r) { - fprintf(stderr, "Failed to allocate :\n"); - fprintf(stderr, " size : %d bytes\n", size); - fprintf(stderr, " alignment : %d bytes\n", alignment); - free(bo); + bo->buf = radeon->ws->buffer_create(radeon->ws, size, alignment, bind, initial_domain); + if (!bo->buf) { + FREE(bo); return NULL; } + bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); } - - if (handle) - util_hash_table_set(radeon->bo_handles, (void *)(uintptr_t)handle, bo); -done: - if (handle) - pipe_mutex_unlock(radeon->bo_handles_mutex); - return bo; } static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) { - struct drm_gem_close args; - - if (bo->name) { - pipe_mutex_lock(radeon->bo_handles_mutex); - util_hash_table_remove(radeon->bo_handles, - (void *)(uintptr_t)bo->name); - pipe_mutex_unlock(radeon->bo_handles_mutex); - } LIST_DEL(&bo->fencedlist); radeon_bo_fixed_unmap(radeon, bo); - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_CLOSE, &args); - memset(bo, 0, sizeof(struct radeon_bo)); - free(bo); + pb_reference(&bo->buf, NULL); + FREE(bo); } void radeon_bo_reference(struct radeon *radeon, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 609a9065db8..58898d3423e 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -617,6 +617,11 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, return TRUE; } +static unsigned trans_get_buffer_handle(struct pb_buffer *buf) +{ + return get_radeon_bo(buf)->handle; +} + void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) { ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; @@ -629,4 +634,6 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_create = radeon_winsys_bo_create; ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; + + ws->base.trans_get_buffer_handle = trans_get_buffer_handle; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 2948ea78c18..2e5000702e0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -325,6 +325,10 @@ struct radeon_winsys { boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, enum radeon_feature_id fid, boolean enable); + + + /* Transitional functions for r600g when moving to winsys/radeon */ + unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf); }; #endif From 60ff68ad00b821eb04ebb5e0d83620421ee16deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 01:34:39 +0200 Subject: [PATCH 315/600] r600g: remove the cache buffer manager from winsys/r600 As we've just started using the one from winsys/radeon. Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/Makefile | 3 +- src/gallium/winsys/r600/drm/SConscript | 1 - src/gallium/winsys/r600/drm/r600_bo.c | 22 +--- src/gallium/winsys/r600/drm/r600_bomgr.c | 159 ----------------------- src/gallium/winsys/r600/drm/r600_drm.c | 7 - src/gallium/winsys/r600/drm/r600_priv.h | 34 ----- 6 files changed, 3 insertions(+), 223 deletions(-) delete mode 100644 src/gallium/winsys/r600/drm/r600_bomgr.c diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index 1d0de31c65a..e5b58d6cf87 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -10,8 +10,7 @@ C_SOURCES = \ radeon_pciid.c \ r600_bo.c \ r600_drm.c \ - r600_hw_context.c \ - r600_bomgr.c + r600_hw_context.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \ -I$(TOP)/include \ diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index efcedc6bff9..3665b6eaeef 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -9,7 +9,6 @@ r600_sources = [ 'r600_bo.c', 'r600_drm.c', 'r600_hw_context.c', - 'r600_bomgr.c', ] env.PkgUseModules('DRM_RADEON') diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 8cf1d809235..4918897be67 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -47,14 +47,6 @@ struct r600_bo *r600_bo(struct radeon *radeon, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { - bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence); - if (bo) { - bo->domains = domains; - return bo; - } - } - switch(usage) { case PIPE_USAGE_DYNAMIC: case PIPE_USAGE_STREAM: @@ -75,12 +67,8 @@ struct r600_bo *r600_bo(struct radeon *radeon, bo = calloc(1, sizeof(struct r600_bo)); bo->size = size; - bo->alignment = alignment; bo->domains = domains; bo->bo = rbo; - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { - r600_bomgr_bo_init(radeon->bomgr, bo); - } pipe_reference_init(&bo->reference, 1); return bo; @@ -128,7 +116,7 @@ void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, voi if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { radeon_bo_map(radeon, bo->bo); - return (uint8_t *) bo->bo->data + bo->offset; + return (uint8_t *) bo->bo->data; } if (p_atomic_read(&bo->bo->reference.count) > 1) { @@ -158,7 +146,7 @@ void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, voi } out: - return (uint8_t *) bo->bo->data + bo->offset; + return (uint8_t *) bo->bo->data; } void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) @@ -168,12 +156,6 @@ void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) { - if (bo->manager_id) { - if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) { - /* destroy is delayed by buffer manager */ - return; - } - } radeon_bo_reference(radeon, &bo->bo, NULL); free(bo); } diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c deleted file mode 100644 index 5cea7654d97..00000000000 --- a/src/gallium/winsys/r600/drm/r600_bomgr.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2010 VMWare. - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jose Fonseca - * Thomas Hellström - * Jerome Glisse - */ -#include "r600_priv.h" -#include "util/u_memory.h" -#include "util/u_time.h" - -static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr) -{ - struct r600_bo *bo, *tmp; - int64_t now; - - now = os_time_get(); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - if(!os_time_timeout(bo->start, bo->end, now)) - break; - - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } -} - -static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr, - struct r600_bo *bo, - unsigned size, - unsigned alignment, - unsigned cfence) -{ - if(bo->size < size) { - return 0; - } - - /* be lenient with size */ - if(bo->size >= 2*size) { - return 0; - } - - if(!pb_check_alignment(alignment, bo->alignment)) { - return 0; - } - - if (!fence_is_after(cfence, bo->fence)) { - return 0; - } - - return 1; -} - -struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, - unsigned size, - unsigned alignment, - unsigned cfence) -{ - struct r600_bo *bo, *tmp; - int64_t now; - - - pipe_mutex_lock(mgr->mutex); - - now = os_time_get(); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) { - LIST_DEL(&bo->list); - --mgr->num_delayed; - r600_bomgr_timeout_flush(mgr); - pipe_mutex_unlock(mgr->mutex); - LIST_INITHEAD(&bo->list); - pipe_reference_init(&bo->reference, 1); - return bo; - } - - if(os_time_timeout(bo->start, bo->end, now)) { - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } - } - - pipe_mutex_unlock(mgr->mutex); - return NULL; -} - -void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo) -{ - LIST_INITHEAD(&bo->list); - bo->manager_id = 1; -} - -boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo) -{ - bo->start = os_time_get(); - bo->end = bo->start + mgr->usecs; - pipe_mutex_lock(mgr->mutex); - LIST_ADDTAIL(&bo->list, &mgr->delayed); - ++mgr->num_delayed; - pipe_mutex_unlock(mgr->mutex); - return FALSE; -} - -void r600_bomgr_destroy(struct r600_bomgr *mgr) -{ - struct r600_bo *bo, *tmp; - - pipe_mutex_lock(mgr->mutex); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } - pipe_mutex_unlock(mgr->mutex); - - FREE(mgr); -} - -struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs) -{ - struct r600_bomgr *mgr; - - mgr = CALLOC_STRUCT(r600_bomgr); - if (mgr == NULL) - return NULL; - - mgr->radeon = radeon; - mgr->usecs = usecs; - LIST_INITHEAD(&mgr->delayed); - mgr->num_delayed = 0; - pipe_mutex_init(mgr->mutex); - - return mgr; -} diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index f2e46d3d0d9..390f51a561c 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -305,10 +305,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws) radeon_get_backend_map(radeon); } - radeon->bomgr = r600_bomgr_create(radeon, 1000000); - if (radeon->bomgr == NULL) { - return NULL; - } r = radeon_init_fence(radeon); if (r) { radeon_destroy(radeon); @@ -327,9 +323,6 @@ struct radeon *radeon_destroy(struct radeon *radeon) r600_bo_reference(radeon, &radeon->fence_bo, NULL); } - if (radeon->bomgr) - r600_bomgr_destroy(radeon->bomgr); - FREE(radeon); return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 621c7eb2158..99c238e04e6 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -34,7 +34,6 @@ #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -struct r600_bomgr; struct r600_bo; struct radeon { @@ -43,7 +42,6 @@ struct radeon { unsigned family; enum chip_class chip_class; struct r600_tiling_info tiling_info; - struct r600_bomgr *bomgr; unsigned fence; unsigned *cfence; struct r600_bo *fence_bo; @@ -94,21 +92,6 @@ struct r600_bo { unsigned domains; struct radeon_bo *bo; unsigned fence; - /* manager data */ - struct list_head list; - unsigned manager_id; - unsigned alignment; - unsigned offset; - int64_t start; - int64_t end; -}; - -struct r600_bomgr { - struct radeon *radeon; - unsigned usecs; - pipe_mutex mutex; - struct list_head delayed; - unsigned num_delayed; }; /* @@ -174,23 +157,6 @@ static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, str */ void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); -/* - * r600_bomgr.c - */ -struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs); -void r600_bomgr_destroy(struct r600_bomgr *mgr); -boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo); -void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo); -struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, - unsigned size, - unsigned alignment, - unsigned cfence); - - -/* - * helpers - */ - /* * radeon_bo.c From cdbb8a195a3581faa7b569997fa84fb9bceffaa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 01:47:57 +0200 Subject: [PATCH 316/600] r600g: remove now-unused r600_bo::size Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 2 -- src/gallium/winsys/r600/drm/r600_priv.h | 1 - 2 files changed, 3 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 4918897be67..9fc799a1586 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -66,7 +66,6 @@ struct r600_bo *r600_bo(struct radeon *radeon, } bo = calloc(1, sizeof(struct r600_bo)); - bo->size = size; bo->domains = domains; bo->bo = rbo; @@ -88,7 +87,6 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan } pipe_reference_init(&bo->reference, 1); - bo->size = rbo->size; bo->domains = (RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 99c238e04e6..0b757a84953 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -88,7 +88,6 @@ struct radeon_bo { struct r600_bo { struct pipe_reference reference; /* this must be the first member for the r600_bo_reference inline to work */ /* DO NOT MOVE THIS ^ */ - unsigned size; unsigned domains; struct radeon_bo *bo; unsigned fence; From 591d8c33502463b816428d18ca779faa282a5a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 01:59:02 +0200 Subject: [PATCH 317/600] r600g: remove the fences which were used for the cache buffer manager Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_drm.c | 23 ------ src/gallium/winsys/r600/drm/r600_hw_context.c | 77 +------------------ src/gallium/winsys/r600/drm/r600_priv.h | 12 +-- src/gallium/winsys/r600/drm/radeon_bo.c | 43 ++--------- 4 files changed, 10 insertions(+), 145 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 390f51a561c..f0ef55e98d5 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -221,21 +221,8 @@ static int radeon_get_backend_map(struct radeon *radeon) return 0; } -static int radeon_init_fence(struct radeon *radeon) -{ - radeon->fence = 1; - radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0); - if (radeon->fence_bo == NULL) { - return -ENOMEM; - } - radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); - *radeon->cfence = 0; - return 0; -} - struct radeon *radeon_create(struct radeon_winsys *ws) { - int r; struct radeon *radeon = CALLOC_STRUCT(radeon); if (radeon == NULL) { return NULL; @@ -305,12 +292,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws) radeon_get_backend_map(radeon); } - r = radeon_init_fence(radeon); - if (r) { - radeon_destroy(radeon); - return NULL; - } - return radeon; } @@ -319,10 +300,6 @@ struct radeon *radeon_destroy(struct radeon *radeon) if (radeon == NULL) return NULL; - if (radeon->fence_bo) { - r600_bo_reference(radeon, &radeon->fence_bo, NULL); - } - FREE(radeon); return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 8ab4d94a6ee..e125fc82e3c 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -143,32 +143,6 @@ void r600_init_cs(struct r600_context *ctx) ctx->init_dwords = ctx->pm4_cdwords; } -static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) -{ - for (int i = 0; i < ctx->creloc; i++) { - if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) - LIST_DELINIT(&ctx->bo[i]->fencedlist); - LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); - ctx->bo[i]->fence = ctx->radeon->fence; - ctx->bo[i]->ctx = ctx; - } -} - -static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) -{ - struct radeon_bo *bo = NULL; - struct radeon_bo *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - if (bo->fence <= *ctx->radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - } else { - bo->fence = fence; - } - } -} - static void r600_init_block(struct r600_context *ctx, struct r600_block *block, const struct r600_reg *reg, int index, int nreg, @@ -757,17 +731,6 @@ static int r600_loop_const_init(struct r600_context *ctx, u32 offset) return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); } -static void r600_context_clear_fenced_bo(struct r600_context *ctx) -{ - struct radeon_bo *bo, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - bo->ctx = NULL; - } -} - static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) { struct r600_block *block; @@ -817,7 +780,6 @@ void r600_context_fini(struct r600_context *ctx) free(ctx->bo); free(ctx->pm4); - r600_context_clear_fenced_bo(ctx); memset(ctx, 0, sizeof(struct r600_context)); } @@ -1058,7 +1020,6 @@ void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo) ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); ctx->reloc[ctx->creloc].flags = 0; radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); - rbo->fence = ctx->radeon->fence; ctx->creloc++; } @@ -1138,7 +1099,6 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat /* find relocation */ reloc_id = block->pm4_bo_index[id]; r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo); - reg->bo->fence = ctx->radeon->fence; /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } @@ -1205,31 +1165,21 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } - if (!dirty) { - if (is_vertex) - state->bo[0]->fence = ctx->radeon->fence; - else { - state->bo[0]->fence = ctx->radeon->fence; - state->bo[1]->fence = ctx->radeon->fence; - } - } else { + + if (dirty) { if (is_vertex) { /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); - state->bo[0]->fence = ctx->radeon->fence; } else { /* TEXTURE RESOURCE */ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); - state->bo[0]->fence = ctx->radeon->fence; - state->bo[1]->fence = ctx->radeon->fence; state->bo[0]->bo->binding |= BO_BOUND_TEXTURE; } - } - if (dirty) { + if (is_vertex) block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; else @@ -1574,7 +1524,6 @@ void r600_context_flush(struct r600_context *ctx) struct drm_radeon_cs drmib = {}; struct drm_radeon_cs_chunk chunks[2]; uint64_t chunk_array[2]; - unsigned fence; int r; struct r600_block *enable_block = NULL; @@ -1592,16 +1541,6 @@ void r600_context_flush(struct r600_context *ctx) /* partial flush is needed to avoid lockups on some chips with user fences */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - /* emit fence */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); - ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); #if 1 /* emit cs */ @@ -1625,16 +1564,6 @@ void r600_context_flush(struct r600_context *ctx) *ctx->radeon->cfence = ctx->radeon->fence; #endif - r600_context_update_fenced_list(ctx); - - fence = ctx->radeon->fence + 1; - if (fence < ctx->radeon->fence) { - /* wrap around */ - fence = 1; - r600_context_fence_wraparound(ctx, fence); - } - ctx->radeon->fence = fence; - /* restart */ for (int i = 0; i < ctx->creloc; i++) { ctx->bo[i]->reloc = NULL; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 0b757a84953..49d3060bbf2 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -34,17 +34,12 @@ #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -struct r600_bo; - struct radeon { struct radeon_winsys *ws; struct radeon_info info; unsigned family; enum chip_class chip_class; struct r600_tiling_info tiling_info; - unsigned fence; - unsigned *cfence; - struct r600_bo *fence_bo; unsigned num_tile_pipes; unsigned backend_map; boolean backend_map_valid; @@ -70,15 +65,11 @@ struct r600_reg { struct radeon_bo { struct pipe_reference reference; struct pb_buffer *buf; - unsigned handle; unsigned size; int map_count; void *data; - struct list_head fencedlist; - unsigned fence; - struct r600_context *ctx; - boolean shared; + struct r600_reloc *reloc; unsigned reloc_id; unsigned last_flush; @@ -90,7 +81,6 @@ struct r600_bo { /* DO NOT MOVE THIS ^ */ unsigned domains; struct radeon_bo *bo; - unsigned fence; }; /* diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 7ff4806c056..63dc44ddb44 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -83,35 +83,24 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, if (bo == NULL) { return NULL; } - bo->size = size; - bo->handle = handle; pipe_reference_init(&bo->reference, 1); - LIST_INITHEAD(&bo->fencedlist); if (handle) { - unsigned size; bo->buf = radeon->ws->buffer_from_handle(radeon->ws, &whandle, NULL, &size); - if (!bo->buf) { - FREE(bo); - return NULL; - } - bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); - bo->size = size; - bo->shared = TRUE; } else { bo->buf = radeon->ws->buffer_create(radeon->ws, size, alignment, bind, initial_domain); - if (!bo->buf) { - FREE(bo); - return NULL; - } - bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); } + if (!bo->buf) { + FREE(bo); + return NULL; + } + bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); + bo->size = size; return bo; } static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) { - LIST_DEL(&bo->fencedlist); radeon_bo_fixed_unmap(radeon, bo); pb_reference(&bo->buf, NULL); FREE(bo); @@ -133,16 +122,6 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) struct drm_radeon_gem_wait_idle args; int ret; - if (!bo->shared) { - if (!bo->fence) - return 0; - if (bo->fence <= *radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - return 0; - } - } - /* Zero out args to make valgrind happy */ memset(&args, 0, sizeof(args)); args.handle = bo->handle; @@ -158,16 +137,6 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain struct drm_radeon_gem_busy args; int ret; - if (!bo->shared) { - if (!bo->fence) - return 0; - if (bo->fence <= *radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - return 0; - } - } - memset(&args, 0, sizeof(args)); args.handle = bo->handle; args.domain = 0; From 9865b1ec8c88154d8ceb982c42844b68e9224217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 02:24:15 +0200 Subject: [PATCH 318/600] r600g: remove now-unused r600_context::fenced_bo Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 2 +- src/gallium/winsys/r600/drm/evergreen_hw_context.c | 4 ---- src/gallium/winsys/r600/drm/r600_hw_context.c | 4 ---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 7e704730122..043215b3ec7 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -254,11 +254,11 @@ struct r600_context { unsigned creloc; struct r600_reloc *reloc; struct radeon_bo **bo; + u32 *pm4; struct list_head query_list; unsigned num_query_running; unsigned backend_mask; - struct list_head fenced_bo; unsigned max_db; /* for OQ */ unsigned num_dest_buffers; unsigned flags; diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 94206d5568b..df89047ebc5 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1008,13 +1008,9 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - ctx->max_db = 8; - LIST_INITHEAD(&ctx->fenced_bo); - r600_get_backend_mask(ctx); - return 0; out_err: r600_context_fini(ctx); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index e125fc82e3c..4d8bb184cbd 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -934,13 +934,9 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - - LIST_INITHEAD(&ctx->fenced_bo); - ctx->max_db = 4; r600_get_backend_mask(ctx); - return 0; out_err: r600_context_fini(ctx); From 638d75185e66727faaba5dc2df1b6e14c7c0c075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 04:31:02 +0200 Subject: [PATCH 319/600] r600g: let radeon_winsys maintain the list of relocations Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 3 +- .../winsys/r600/drm/evergreen_hw_context.c | 14 ++----- src/gallium/winsys/r600/drm/r600_hw_context.c | 37 +++++++---------- src/gallium/winsys/r600/drm/r600_priv.h | 1 + src/gallium/winsys/r600/drm/radeon_bo.c | 1 + src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 40 +++++++++++++++---- src/gallium/winsys/radeon/drm/radeon_winsys.h | 5 +++ 7 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 043215b3ec7..3ac60bce611 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -239,6 +239,7 @@ struct r600_query { struct r600_context { struct radeon *radeon; + struct radeon_winsys_cs *cs; struct r600_range *range; unsigned nblocks; struct r600_block **blocks; @@ -250,7 +251,7 @@ struct r600_context { unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; unsigned init_dwords; - unsigned nreloc; + unsigned creloc; struct r600_reloc *reloc; struct radeon_bo **bo; diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index df89047ebc5..3f6f8b5368b 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -986,14 +986,10 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; + ctx->cs = radeon->ws->cs_create(radeon->ws); + /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; @@ -1146,10 +1142,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 4d8bb184cbd..bba55d68267 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -776,9 +776,9 @@ void r600_context_fini(struct r600_context *ctx) r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->range); free(ctx->blocks); - free(ctx->reloc); free(ctx->bo); free(ctx->pm4); + ctx->radeon->ws->cs_destroy(ctx->cs); memset(ctx, 0, sizeof(struct r600_context)); } @@ -912,14 +912,10 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; + ctx->cs = radeon->ws->cs_create(radeon->ws); + /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; @@ -1009,14 +1005,15 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo) { struct radeon_bo *bo = rbo->bo; - bo->reloc = &ctx->reloc[ctx->creloc]; - bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; - ctx->reloc[ctx->creloc].handle = bo->handle; - ctx->reloc[ctx->creloc].read_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].flags = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); - ctx->creloc++; + + unsigned reloc_index = ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf, + rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), + rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), + (void**)&ctx->reloc, &ctx->creloc); + + bo->reloc = (void*)1; + bo->reloc_id = reloc_index * 4; + radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo); } void r600_context_reg(struct r600_context *ctx, @@ -1444,10 +1441,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ @@ -1570,6 +1563,7 @@ void r600_context_flush(struct r600_context *ctx) ctx->pm4_dirty_cdwords = 0; ctx->pm4_cdwords = 0; ctx->flags = 0; + ctx->radeon->ws->cs_flush(ctx->cs, 0); r600_init_cs(ctx); @@ -1601,8 +1595,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, { unsigned ndwords = 10; - if (((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) || - (ctx->creloc >= (ctx->nreloc - 1))) { + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ r600_context_flush(ctx); } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 49d3060bbf2..baf7c98d578 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -65,6 +65,7 @@ struct r600_reg { struct radeon_bo { struct pipe_reference reference; struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; unsigned handle; unsigned size; int map_count; diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 63dc44ddb44..536bbe45bff 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -94,6 +94,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, FREE(bo); return NULL; } + bo->cs_buf = radeon->ws->buffer_get_cs_handle(bo->buf); bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); bo->size = size; return bo; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 1b30b95a318..9a1e16957d2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -219,11 +219,11 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static void radeon_add_reloc(struct radeon_cs_context *csc, - struct radeon_bo *bo, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd, - enum radeon_bo_domain *added_domains) +static unsigned radeon_add_reloc(struct radeon_cs_context *csc, + struct radeon_bo *bo, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd, + enum radeon_bo_domain *added_domains) { struct drm_radeon_cs_reloc *reloc; unsigned i; @@ -233,7 +233,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); - return; + return csc->reloc_indices_hashlist[hash]; } /* Hash collision, look for the BO in the list of relocs linearly. */ @@ -246,7 +246,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, csc->relocs_hashlist[hash] = reloc; csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ - return; + return i; } } } @@ -280,9 +280,9 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, csc->reloc_indices_hashlist[hash] = csc->crelocs; csc->chunks[1].length_dw += RELOC_DWORDS; - csc->crelocs++; *added_domains = rd | wd; + return csc->crelocs++; } static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, @@ -470,6 +470,28 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, return radeon_bo_is_referenced_by_cs(cs, bo); } +static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd, + void **reloc_list, unsigned *reloc_count) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)buf; + enum radeon_bo_domain added_domains; + + unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); + + if (added_domains & RADEON_DOMAIN_GTT) + cs->csc->used_gart += bo->size; + if (added_domains & RADEON_DOMAIN_VRAM) + cs->csc->used_vram += bo->size; + + *reloc_list = cs->csc->relocs; + *reloc_count = cs->csc->crelocs; + return index; +} + void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) { ws->base.cs_create = radeon_drm_cs_create; @@ -480,4 +502,6 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_set_flush = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; + + ws->base.trans_add_reloc = trans_add_reloc; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 2e5000702e0..8e81fa1e301 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -329,6 +329,11 @@ struct radeon_winsys { /* Transitional functions for r600g when moving to winsys/radeon */ unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf); + unsigned (*trans_add_reloc)(struct radeon_winsys_cs *cs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd, + void **reloc_list, unsigned *reloc_count); }; #endif From 3e579722167dea997f547970e2b62d4594875b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 05:15:36 +0200 Subject: [PATCH 320/600] r600g: remove reloc-related variables from radeon_bo Reviewed-by: Alex Deucher --- .../winsys/r600/drm/evergreen_hw_context.c | 3 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 45 +++++-------------- src/gallium/winsys/r600/drm/r600_priv.h | 18 ++++---- 3 files changed, 22 insertions(+), 44 deletions(-) diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 3f6f8b5368b..cd63c8db156 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1192,8 +1192,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = 0; - r600_context_bo_reloc(ctx, &pm4[10], draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index bba55d68267..5dd079f62ac 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -86,8 +86,7 @@ void r600_get_backend_mask(struct r600_context *ctx) ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer); /* execute */ r600_context_flush(ctx); @@ -997,25 +996,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, rbo); } bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } -void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo) -{ - struct radeon_bo *bo = rbo->bo; - - unsigned reloc_index = ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf, - rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), - rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), - (void**)&ctx->reloc, &ctx->creloc); - - bo->reloc = (void*)1; - bo->reloc_id = reloc_index * 4; - radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo); -} - void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask) @@ -1321,9 +1306,8 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * if (block->pm4_bo_index[j]) { /* find relocation */ id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); + block->pm4[block->reloc[id].bo_pm4_index] = + r600_context_bo_reloc(ctx, block->reloc[id].bo); r600_context_bo_flush(ctx, block->reloc[id].flush_flags, block->reloc[id].flush_mask, @@ -1376,9 +1360,8 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 if (block->pm4_bo_index[j]) { /* find relocation */ id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); + block->pm4[block->reloc[id].bo_pm4_index] = + r600_context_bo_reloc(ctx, block->reloc[id].bo); r600_context_bo_flush(ctx, block->reloc[id].flush_flags, block->reloc[id].flush_mask, @@ -1493,8 +1476,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = 0; - r600_context_bo_reloc(ctx, &pm4[10], draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; @@ -1555,7 +1537,6 @@ void r600_context_flush(struct r600_context *ctx) /* restart */ for (int i = 0; i < ctx->creloc; i++) { - ctx->bo[i]->reloc = NULL; ctx->bo[i]->last_flush = 0; radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); } @@ -1609,8 +1590,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo); } static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) @@ -1721,8 +1701,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer); query->state |= R600_QUERY_STATE_STARTED; query->state ^= R600_QUERY_STATE_ENDED; @@ -1746,8 +1725,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer); query->results_end += query->result_size; if (query->results_end >= query->buffer_size) @@ -1792,8 +1770,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, ctx->pm4[ctx->pm4_cdwords++] = results_base; ctx->pm4[ctx->pm4_cdwords++] = op; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer); results_base += query->result_size; if (results_base >= query->buffer_size) results_base = 0; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index baf7c98d578..930cf81813a 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -30,6 +30,7 @@ #include "../../radeon/drm/radeon_winsys.h" #include "util/u_hash_table.h" #include "os/os_thread.h" +#include "radeon_drm.h" #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) @@ -71,8 +72,6 @@ struct radeon_bo { int map_count; void *data; - struct r600_reloc *reloc; - unsigned reloc_id; unsigned last_flush; unsigned binding; }; @@ -111,7 +110,6 @@ int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); * r600_hw_context.c */ int r600_context_init_fence(struct r600_context *ctx); -void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); @@ -129,17 +127,21 @@ void r600_context_reg(struct r600_context *ctx, void r600_init_cs(struct r600_context *ctx); int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base); -static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) +static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo) { struct radeon_bo *bo = rbo->bo; + unsigned reloc_index; assert(bo != NULL); - if (!bo->reloc) - r600_context_get_reloc(ctx, rbo); + reloc_index = ctx->radeon->ws->trans_add_reloc( + ctx->cs, bo->cs_buf, + rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), + rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), + (void**)&ctx->reloc, &ctx->creloc); - /* set PKT3 to point to proper reloc */ - *pm4 = bo->reloc_id; + radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo); + return reloc_index * 4; } /* From 1acaf09778323ed6a2b0a0de5acb7731622a82bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 3 Aug 2011 19:27:49 +0200 Subject: [PATCH 321/600] r600g: don't use RADEON_GEM_DOMAIN_CPU Also staging resources shouldn't be allocated with the initial domain being VRAM. Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 41 ++++++++++++------------- src/gallium/winsys/r600/drm/r600_priv.h | 3 +- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 9fc799a1586..0e41a1709ee 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -40,26 +40,27 @@ struct r600_bo *r600_bo(struct radeon *radeon, * and are used for uploads and downloads from regular * resources. We generate them internally for some transfers. */ - if (usage == PIPE_USAGE_STAGING) - domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT; - else - domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); - - switch(usage) { - case PIPE_USAGE_DYNAMIC: - case PIPE_USAGE_STREAM: - case PIPE_USAGE_STAGING: + if (usage == PIPE_USAGE_STAGING) { + domains = RADEON_GEM_DOMAIN_GTT; initial_domain = RADEON_GEM_DOMAIN_GTT; - break; - case PIPE_USAGE_DEFAULT: - case PIPE_USAGE_STATIC: - case PIPE_USAGE_IMMUTABLE: - default: - initial_domain = RADEON_GEM_DOMAIN_VRAM; - break; + } else { + domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + + switch(usage) { + case PIPE_USAGE_DYNAMIC: + case PIPE_USAGE_STREAM: + case PIPE_USAGE_STAGING: + initial_domain = RADEON_GEM_DOMAIN_GTT; + break; + case PIPE_USAGE_DEFAULT: + case PIPE_USAGE_STATIC: + case PIPE_USAGE_IMMUTABLE: + default: + initial_domain = RADEON_GEM_DOMAIN_VRAM; + break; + } } + rbo = radeon_bo(radeon, 0, size, alignment, binding, initial_domain); if (rbo == NULL) { return NULL; @@ -87,9 +88,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan } pipe_reference_init(&bo->reference, 1); - bo->domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); + bo->domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; if (stride) *stride = whandle->stride; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 930cf81813a..5bb515d743e 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -136,8 +136,7 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6 reloc_index = ctx->radeon->ws->trans_add_reloc( ctx->cs, bo->cs_buf, - rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), - rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM), + rbo->domains, rbo->domains, (void**)&ctx->reloc, &ctx->creloc); radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo); From 685b8345a8ff69460f0c62c678493527b59b18a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 00:15:54 +0200 Subject: [PATCH 322/600] r600g: remove struct r600_reloc That is really private to winsys/radeon. Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 15 ++------------- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 3ac60bce611..0562b6da31e 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -190,18 +190,6 @@ struct r600_range { struct r600_block **blocks; }; -/* - * relocation - */ -#pragma pack(1) -struct r600_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - /* * query */ @@ -240,6 +228,7 @@ struct r600_query { struct r600_context { struct radeon *radeon; struct radeon_winsys_cs *cs; + struct r600_range *range; unsigned nblocks; struct r600_block **blocks; @@ -253,7 +242,7 @@ struct r600_context { unsigned init_dwords; unsigned creloc; - struct r600_reloc *reloc; + unsigned *reloc; struct radeon_bo **bo; u32 *pm4; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 5dd079f62ac..1950e583121 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1521,7 +1521,7 @@ void r600_context_flush(struct r600_context *ctx) chunks[0].length_dw = ctx->pm4_cdwords; chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; + chunks[1].length_dw = ctx->creloc * 4; chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; From e6fb62594fca714883af9bba9795be8838c16900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 01:37:33 +0200 Subject: [PATCH 323/600] r600g: emit CS using radeon_winsys Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 4 +- .../winsys/r600/drm/evergreen_hw_context.c | 6 +-- src/gallium/winsys/r600/drm/r600_drm.c | 3 ++ src/gallium/winsys/r600/drm/r600_hw_context.c | 39 ++++--------------- src/gallium/winsys/r600/drm/r600_priv.h | 9 +++-- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 5 +-- src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 +- 7 files changed, 20 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 0562b6da31e..84cfa2a17e6 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -236,16 +236,16 @@ struct r600_context { struct list_head resource_dirty; struct list_head enable_list; unsigned pm4_ndwords; - unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; unsigned init_dwords; unsigned creloc; - unsigned *reloc; struct radeon_bo **bo; u32 *pm4; + unsigned pm4_cdwords; + struct list_head query_list; unsigned num_query_running; unsigned backend_mask; diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index cd63c8db156..29da7bea4c6 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -995,11 +995,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) goto out_err; } ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index f0ef55e98d5..270a07a3a89 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -292,6 +292,9 @@ struct radeon *radeon_create(struct radeon_winsys *ws) radeon_get_backend_map(radeon); } + /* XXX disable ioctl thread offloading until the porting is done. */ + setenv("RADEON_THREAD", "0", 0); + return radeon; } diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 1950e583121..59450b5ba83 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -776,7 +776,6 @@ void r600_context_fini(struct r600_context *ctx) free(ctx->range); free(ctx->blocks); free(ctx->bo); - free(ctx->pm4); ctx->radeon->ws->cs_destroy(ctx->cs); memset(ctx, 0, sizeof(struct r600_context)); @@ -920,11 +919,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) goto out_err; } ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ @@ -1492,10 +1487,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) void r600_context_flush(struct r600_context *ctx) { - struct drm_radeon_cs drmib = {}; - struct drm_radeon_cs_chunk chunks[2]; - uint64_t chunk_array[2]; - int r; struct r600_block *enable_block = NULL; if (ctx->pm4_cdwords == ctx->init_dwords) @@ -1513,27 +1504,12 @@ void r600_context_flush(struct r600_context *ctx) ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); -#if 1 - /* emit cs */ - drmib.num_chunks = 2; - drmib.chunks = (uint64_t)(uintptr_t)chunk_array; - chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->pm4_cdwords; - chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; - chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->creloc * 4; - chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; - chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; - r = drmCommandWriteRead(ctx->radeon->info.fd, DRM_RADEON_CS, &drmib, - sizeof(struct drm_radeon_cs)); - if (r) { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); - } -#else - *ctx->radeon->cfence = ctx->radeon->fence; -#endif + /* Flush the CS. */ + ctx->cs->cdw = ctx->pm4_cdwords; + ctx->radeon->ws->cs_flush(ctx->cs, 0); + /* We need to get the pointer to the other CS, + * the command streams are double-buffered. */ + ctx->pm4 = ctx->cs->buf; /* restart */ for (int i = 0; i < ctx->creloc; i++) { @@ -1544,7 +1520,6 @@ void r600_context_flush(struct r600_context *ctx) ctx->pm4_dirty_cdwords = 0; ctx->pm4_cdwords = 0; ctx->flags = 0; - ctx->radeon->ws->cs_flush(ctx->cs, 0); r600_init_cs(ctx); diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 5bb515d743e..83e964b7f69 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -134,10 +134,11 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6 assert(bo != NULL); - reloc_index = ctx->radeon->ws->trans_add_reloc( - ctx->cs, bo->cs_buf, - rbo->domains, rbo->domains, - (void**)&ctx->reloc, &ctx->creloc); + reloc_index = + ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains); + + if (reloc_index >= ctx->creloc) + ctx->creloc = reloc_index+1; radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo); return reloc_index * 4; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 9a1e16957d2..09befb39b11 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -473,8 +473,7 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs, struct radeon_winsys_cs_handle *buf, enum radeon_bo_domain rd, - enum radeon_bo_domain wd, - void **reloc_list, unsigned *reloc_count) + enum radeon_bo_domain wd) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; @@ -487,8 +486,6 @@ static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs, if (added_domains & RADEON_DOMAIN_VRAM) cs->csc->used_vram += bo->size; - *reloc_list = cs->csc->relocs; - *reloc_count = cs->csc->crelocs; return index; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 8e81fa1e301..073b6aad79a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -332,8 +332,7 @@ struct radeon_winsys { unsigned (*trans_add_reloc)(struct radeon_winsys_cs *cs, struct radeon_winsys_cs_handle *buf, enum radeon_bo_domain rd, - enum radeon_bo_domain wd, - void **reloc_list, unsigned *reloc_count); + enum radeon_bo_domain wd); }; #endif From 03b25ad8ffd72f1f88b6c80a2ebfe3cf6e8a6390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 02:36:57 +0200 Subject: [PATCH 324/600] winsys/radeon: consolidate the add_reloc function Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_priv.h | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 36 ++++--------------- src/gallium/winsys/radeon/drm/radeon_winsys.h | 13 +++---- 3 files changed, 13 insertions(+), 38 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 83e964b7f69..54b66cc9e35 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -135,7 +135,7 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6 assert(bo != NULL); reloc_index = - ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains); + ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains); if (reloc_index >= ctx->creloc) ctx->creloc = reloc_index+1; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 09befb39b11..6da0ae67743 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -285,24 +285,23 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc, return csc->crelocs++; } -static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd) +static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); - - if (!added_domains) - return; + unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->size; if (added_domains & RADEON_DOMAIN_VRAM) cs->csc->used_vram += bo->size; + + return index; } static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) @@ -470,25 +469,6 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, return radeon_bo_is_referenced_by_cs(cs, bo); } -static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct radeon_bo *bo = (struct radeon_bo*)buf; - enum radeon_bo_domain added_domains; - - unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); - - if (added_domains & RADEON_DOMAIN_GTT) - cs->csc->used_gart += bo->size; - if (added_domains & RADEON_DOMAIN_VRAM) - cs->csc->used_vram += bo->size; - - return index; -} - void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) { ws->base.cs_create = radeon_drm_cs_create; @@ -499,6 +479,4 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_set_flush = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; - - ws->base.trans_add_reloc = trans_add_reloc; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 073b6aad79a..6360d6a8401 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -259,11 +259,12 @@ struct radeon_winsys { * \param buf A winsys buffer to validate. * \param rd A read domain containing a bitmask of the RADEON_DOMAIN_* flags. * \param wd A write domain containing a bitmask of the RADEON_DOMAIN_* flags. + * \return Relocation index. */ - void (*cs_add_reloc)(struct radeon_winsys_cs *cs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd); + unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd); /** * Return TRUE if there is enough memory in VRAM and GTT for the relocs @@ -329,10 +330,6 @@ struct radeon_winsys { /* Transitional functions for r600g when moving to winsys/radeon */ unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf); - unsigned (*trans_add_reloc)(struct radeon_winsys_cs *cs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd); }; #endif From 1b542aca6e998e544a90ccff310f74b2811b8db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 03:01:44 +0200 Subject: [PATCH 325/600] r600g: move more DRM queries into winsys/radeon Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_drm.c | 45 +------------------ src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- src/gallium/winsys/r600/drm/r600_priv.h | 3 -- .../winsys/radeon/drm/radeon_drm_winsys.c | 19 +++++++- src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 ++ 5 files changed, 24 insertions(+), 48 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 270a07a3a89..a1b0ba1fb0f 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -68,12 +68,12 @@ unsigned r600_get_num_backends(struct radeon *radeon) unsigned r600_get_num_tile_pipes(struct radeon *radeon) { - return radeon->num_tile_pipes; + return radeon->info.r600_num_tile_pipes; } unsigned r600_get_backend_map(struct radeon *radeon) { - return radeon->backend_map; + return radeon->info.r600_backend_map; } unsigned r600_get_minor_version(struct radeon *radeon) @@ -185,42 +185,6 @@ static int radeon_drm_get_tiling(struct radeon *radeon) } } -static int radeon_get_num_tile_pipes(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t num_tile_pipes = 0; - int r; - - info.request = RADEON_INFO_NUM_TILE_PIPES; - info.value = (uintptr_t)&num_tile_pipes; - r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->num_tile_pipes = num_tile_pipes; - return 0; -} - -static int radeon_get_backend_map(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t backend_map = 0; - int r; - - info.request = RADEON_INFO_BACKEND_MAP; - info.value = (uintptr_t)&backend_map; - r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->backend_map = backend_map; - radeon->backend_map_valid = TRUE; - - return 0; -} - struct radeon *radeon_create(struct radeon_winsys *ws) { struct radeon *radeon = CALLOC_STRUCT(radeon); @@ -287,11 +251,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws) if (radeon_drm_get_tiling(radeon)) return NULL; - if (radeon->info.drm_minor >= 11) { - radeon_get_num_tile_pipes(radeon); - radeon_get_backend_map(radeon); - } - /* XXX disable ioctl thread offloading until the porting is done. */ setenv("RADEON_THREAD", "0", 0); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 59450b5ba83..c72e8548de8 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -41,7 +41,7 @@ void r600_get_backend_mask(struct r600_context *ctx) unsigned i, mask = 0; /* if backend_map query is supported by the kernel */ - if (ctx->radeon->backend_map_valid) { + if (ctx->radeon->info.r600_backend_map_valid) { unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon); unsigned backend_map = r600_get_backend_map(ctx->radeon); unsigned item_width, item_mask; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 54b66cc9e35..036468e3a31 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -41,9 +41,6 @@ struct radeon { unsigned family; enum chip_class chip_class; struct r600_tiling_info tiling_info; - unsigned num_tile_pipes; - unsigned backend_map; - boolean backend_map_valid; }; /* these flags are used in register flags and added into block flags */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 3be6e34f6f0..1f3bd6dd7bd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -58,7 +58,15 @@ #endif #ifndef RADEON_INFO_NUM_BACKENDS -#define RADEON_INFO_NUM_BACKENDS 10 +#define RADEON_INFO_NUM_BACKENDS 0xa +#endif + +#ifndef RADEON_INFO_NUM_TILE_PIPES +#define RADEON_INFO_NUM_TILE_PIPES 0xb +#endif + +#ifndef RADEON_INFO_BACKEND_MAP +#define RADEON_INFO_BACKEND_MAP 0xd #endif /* Enable/disable feature access for one command stream. @@ -240,6 +248,15 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, &ws->info.r600_tiling_config); + + if (ws->info.drm_minor >= 11) { + radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, + &ws->info.r600_num_tile_pipes); + + if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, + &ws->info.r600_backend_map)) + ws->info.r600_backend_map_valid = TRUE; + } } return TRUE; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 6360d6a8401..dcb3f587a6e 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -85,6 +85,9 @@ struct radeon_info { uint32_t r600_num_backends; uint32_t r600_clock_crystal_freq; uint32_t r600_tiling_config; + uint32_t r600_num_tile_pipes; + uint32_t r600_backend_map; + boolean r600_backend_map_valid; }; enum radeon_feature_id { From ecfcf25387284f01131eeaf9ec3f72bc481f3cfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 03:07:42 +0200 Subject: [PATCH 326/600] r600g: get winsys_handle using radeon_winsys Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 15 +-------------- src/gallium/winsys/r600/drm/r600_priv.h | 3 --- src/gallium/winsys/r600/drm/radeon_bo.c | 16 ---------------- 3 files changed, 1 insertion(+), 33 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 0e41a1709ee..9e2c08ab56f 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -160,18 +160,5 @@ void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo, unsigned stride, struct winsys_handle *whandle) { - whandle->stride = stride; - switch(whandle->type) { - case DRM_API_HANDLE_TYPE_KMS: - whandle->handle = bo->bo->handle; - break; - case DRM_API_HANDLE_TYPE_SHARED: - if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle)) - return FALSE; - break; - default: - return FALSE; - } - - return TRUE; + return radeon->ws->buffer_get_handle(bo->bo->buf, stride, whandle); } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 036468e3a31..0efb02aadae 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -98,9 +98,6 @@ int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32 int radeon_bo_get_tiling_flags(struct radeon *radeon, struct radeon_bo *bo, uint32_t *tiling_flags); -int radeon_bo_get_name(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *name); int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); /* diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 536bbe45bff..9d351480d59 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -165,19 +165,3 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon, *tiling_flags = args.tiling_flags; return ret; } - -int radeon_bo_get_name(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *name) -{ - struct drm_gem_flink flink; - int ret; - - flink.handle = bo->handle; - ret = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret) - return ret; - - *name = flink.name; - return ret; -} From 7ee65800c36a5ee3f6b8ec4ae2d4f15f91d4661b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 03:19:33 +0200 Subject: [PATCH 327/600] r600g: get tiling flags using radeon_winsys Also remove some unused fence-related leftovers. Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_bo.c | 18 +++++++++--------- src/gallium/winsys/r600/drm/r600_priv.h | 18 ------------------ src/gallium/winsys/r600/drm/radeon_bo.c | 17 ----------------- 3 files changed, 9 insertions(+), 44 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 9e2c08ab56f..0b2a9aabac3 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -79,7 +79,6 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan { struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); struct radeon_bo *rbo; - unsigned tiling_flags; rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0, 0); if (rbo == NULL) { @@ -93,16 +92,17 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan if (stride) *stride = whandle->stride; - radeon_bo_get_tiling_flags(radeon, rbo, &tiling_flags); if (array_mode) { - if (tiling_flags) { - if (tiling_flags & RADEON_TILING_MACRO) - *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; - else if (tiling_flags & RADEON_TILING_MICRO) - *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; - } else { + enum radeon_bo_layout micro, macro; + + radeon->ws->buffer_get_tiling(rbo->buf, µ, ¯o); + + if (macro == RADEON_LAYOUT_TILED) + *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; + else if (micro == RADEON_LAYOUT_TILED) + *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; + else *array_mode = 0; - } } return bo; } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 0efb02aadae..952f91df89a 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -94,16 +94,11 @@ void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, struct radeon_bo *src); int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); -int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); -int radeon_bo_get_tiling_flags(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *tiling_flags); int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); /* * r600_hw_context.c */ -int r600_context_init_fence(struct r600_context *ctx); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); @@ -161,17 +156,4 @@ static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) assert(bo->map_count >= 0); } -/* - * fence - */ -static inline boolean fence_is_after(unsigned fence, unsigned ofence) -{ - /* handle wrap around */ - if (fence < 0x80000000 && ofence > 0x80000000) - return TRUE; - if (fence > ofence) - return TRUE; - return FALSE; -} - #endif diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 9d351480d59..f2f53a14a58 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -148,20 +148,3 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain *domain = args.domain; return ret; } - -int radeon_bo_get_tiling_flags(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *tiling_flags) -{ - struct drm_radeon_gem_get_tiling args = {}; - int ret; - - args.handle = bo->handle; - ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_GET_TILING, - &args, sizeof(args)); - if (ret) - return ret; - - *tiling_flags = args.tiling_flags; - return ret; -} From e2e1dc9e66ff348caa97b7b35e558d75c6cc6899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 03:38:20 +0200 Subject: [PATCH 328/600] r600g: set the flush callback in radeon_winsys I have also renamed the winsys function. Reviewed-by: Alex Deucher --- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r600/r600.h | 2 +- src/gallium/drivers/r600/r600_pipe.c | 21 +++++++++++++--- .../winsys/r600/drm/evergreen_hw_context.c | 2 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 25 ++++++++++--------- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 2 +- src/gallium/winsys/radeon/drm/radeon_winsys.h | 6 ++--- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 5c222588e47..b304999d424 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -457,7 +457,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_render_functions(r300); r300_init_states(&r300->context); - rws->cs_set_flush(r300->cs, r300_flush_callback, r300); + rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300); /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 84cfa2a17e6..21b42736c6a 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -278,7 +278,7 @@ void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r6 void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); -void r600_context_flush(struct r600_context *ctx); +void r600_context_flush(struct r600_context *ctx, unsigned flags); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 8a18207d1ea..e3e31982acd 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -114,8 +114,10 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) return fence; } + static void r600_flush(struct pipe_context *ctx, - struct pipe_fence_handle **fence) + struct pipe_fence_handle **fence, + unsigned flags) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence **rfence = (struct r600_fence**)fence; @@ -123,7 +125,18 @@ static void r600_flush(struct pipe_context *ctx, if (rfence) *rfence = r600_create_fence(rctx); - r600_context_flush(&rctx->ctx); + r600_context_flush(&rctx->ctx, flags); +} + +static void r600_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence) +{ + r600_flush(ctx, fence, 0); +} + +static void r600_flush_from_winsys(void *ctx, unsigned flags) +{ + r600_flush((struct pipe_context*)ctx, NULL, flags); } static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) @@ -194,7 +207,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.screen = screen; rctx->context.priv = priv; rctx->context.destroy = r600_destroy_context; - rctx->context.flush = r600_flush; + rctx->context.flush = r600_flush_from_st; /* Easy accessing of screen/winsys. */ rctx->screen = rscreen; @@ -244,6 +257,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->screen->ws->cs_set_flush_callback(rctx->ctx.cs, r600_flush_from_winsys, rctx); + util_slab_create(&rctx->pool_transfers, sizeof(struct pipe_transfer), 64, UTIL_SLAB_SINGLETHREADED); diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 29da7bea4c6..1d582ceeaa5 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1158,7 +1158,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index c72e8548de8..760bfc591c0 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -89,7 +89,7 @@ void r600_get_backend_mask(struct r600_context *ctx) ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer); /* execute */ - r600_context_flush(ctx); + r600_context_flush(ctx, 0); /* analyze results */ results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL); @@ -940,7 +940,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); @@ -1441,7 +1441,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1485,7 +1485,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) ctx->pm4_dirty_cdwords = 0; } -void r600_context_flush(struct r600_context *ctx) +void r600_context_flush(struct r600_context *ctx, unsigned flags) { struct r600_block *enable_block = NULL; @@ -1506,7 +1506,8 @@ void r600_context_flush(struct r600_context *ctx) /* Flush the CS. */ ctx->cs->cdw = ctx->pm4_cdwords; - ctx->radeon->ws->cs_flush(ctx->cs, 0); + ctx->radeon->ws->cs_flush(ctx->cs, flags); + /* We need to get the pointer to the other CS, * the command streams are double-buffered. */ ctx->pm4 = ctx->cs->buf; @@ -1553,7 +1554,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); @@ -1615,7 +1616,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { @@ -1626,7 +1627,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) query->queries_emitted = 1; } else { if (++query->queries_emitted > query->buffer_size / query->result_size / 2) - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } } @@ -1637,7 +1638,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) /* collect current results if query buffer is full */ if (new_results_end == query->results_start) { if (!(query->state & R600_QUERY_STATE_FLUSHED)) - r600_context_flush(ctx); + r600_context_flush(ctx, 0); r600_query_result(ctx, query, TRUE); } @@ -1718,7 +1719,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, { if (operation == PREDICATION_OP_CLEAR) { if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords) - r600_context_flush(ctx); + r600_context_flush(ctx, 0); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; @@ -1734,7 +1735,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, count /= query->result_size; if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords) - r600_context_flush(ctx); + r600_context_flush(ctx, 0); op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); @@ -1807,7 +1808,7 @@ boolean r600_context_query_result(struct r600_context *ctx, uint64_t *result = (uint64_t*)vresult; if (!(query->state & R600_QUERY_STATE_FLUSHED)) { - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } if (!r600_query_result(ctx, query, wait)) return FALSE; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 6da0ae67743..1ec324e5b74 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -477,6 +477,6 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; ws->base.cs_flush = radeon_drm_cs_flush; - ws->base.cs_set_flush = radeon_drm_cs_set_flush; + ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index dcb3f587a6e..47274055207 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -306,9 +306,9 @@ struct radeon_winsys { * \param flush A flush callback function associated with the command stream. * \param user A user pointer that will be passed to the flush callback. */ - void (*cs_set_flush)(struct radeon_winsys_cs *cs, - void (*flush)(void *ctx, unsigned flags), - void *ctx); + void (*cs_set_flush_callback)(struct radeon_winsys_cs *cs, + void (*flush)(void *ctx, unsigned flags), + void *ctx); /** * Return TRUE if a buffer is referenced by a command stream. From ab630b5768b0bfa4d7729d110ce4fb8f42e0cfb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 04:27:48 +0200 Subject: [PATCH 329/600] r600g: use buffer_map/unmap from radeon_winsys This also drops the unneeded bo_busy/wait functions. Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 3 +- src/gallium/drivers/r600/r600_asm.c | 2 +- src/gallium/drivers/r600/r600_buffer.c | 7 +- src/gallium/drivers/r600/r600_pipe.c | 4 +- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r600_texture.c | 5 +- src/gallium/winsys/r600/drm/r600_bo.c | 40 +---------- src/gallium/winsys/r600/drm/r600_hw_context.c | 10 +-- src/gallium/winsys/r600/drm/r600_priv.h | 27 +------- src/gallium/winsys/r600/drm/radeon_bo.c | 69 ------------------- 10 files changed, 23 insertions(+), 146 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 21b42736c6a..0c70fe2bb0a 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -95,13 +95,14 @@ unsigned r600_get_backend_map(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; +struct radeon_winsys_cs; struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, unsigned *stride, unsigned *array_mode); -void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, unsigned stride, struct winsys_handle *whandle); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 24af9917a6f..f5244a723f4 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2231,7 +2231,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru return -ENOMEM; } - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (bytecode == NULL) { r600_bc_clear(&bc); r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 28d8c6af1cb..b8c6a419748 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -85,7 +85,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if (rbuffer->r.b.user_ptr) return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; - data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, transfer->usage, pipe); + data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, rctx->ctx.cs, transfer->usage); if (!data) return NULL; @@ -134,9 +134,8 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, assert(rbuffer->r.b.user_ptr == NULL); - map = r600_bo_map(radeon, rbuffer->r.bo, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, - pipe); + map = r600_bo_map(radeon, rbuffer->r.bo, rctx->ctx.cs, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); memcpy(map + box->x, data, box->width); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index e3e31982acd..1072ea0744d 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -54,6 +54,7 @@ */ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence *fence = NULL; if (!ctx->fences.bo) { @@ -63,7 +64,8 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) R600_ERR("r600: failed to create bo for fence objects\n"); return NULL; } - ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); + ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, rctx->ctx.cs, + PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_WRITE); } if (!LIST_IS_EMPTY(&ctx->fences.pool)) { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2551aa26f2a..0f226ebd52a 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -86,7 +86,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s if (shader->bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (R600_BIG_ENDIAN) { for (i = 0; i < rshader->bc.ndw; ++i) { ptr[i] = bswap_32(rshader->bc.bytecode[i]); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 386d8f35015..f9f0d702008 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -682,10 +682,11 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_bo *bo; enum pipe_format format = transfer->resource->format; - struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon; + struct radeon *radeon = rctx->screen->radeon; unsigned offset = 0; char *map; @@ -704,7 +705,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - if (!(map = r600_bo_map(radeon, bo, transfer->usage, ctx))) { + if (!(map = r600_bo_map(radeon, bo, rctx->ctx.cs, transfer->usage))) { return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 0b2a9aabac3..184efcc0e9a 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -107,48 +107,14 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan return bo; } -void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx) +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage) { - struct pipe_context *pctx = ctx; - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { - radeon_bo_map(radeon, bo->bo); - return (uint8_t *) bo->bo->data; - } - - if (p_atomic_read(&bo->bo->reference.count) > 1) { - if (usage & PIPE_TRANSFER_DONTBLOCK) { - return NULL; - } - if (ctx) { - pctx->flush(pctx, NULL); - } - } - - if (usage & PIPE_TRANSFER_DONTBLOCK) { - uint32_t domain; - - if (radeon_bo_busy(radeon, bo->bo, &domain)) - return NULL; - if (radeon_bo_map(radeon, bo->bo)) { - return NULL; - } - goto out; - } - - radeon_bo_map(radeon, bo->bo); - if (radeon_bo_wait(radeon, bo->bo)) { - radeon_bo_unmap(radeon, bo->bo); - return NULL; - } - -out: - return (uint8_t *) bo->bo->data; + return radeon->ws->buffer_map(bo->bo->buf, cs, usage); } void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) { - radeon_bo_unmap(radeon, bo->bo); + radeon->ws->buffer_unmap(bo->bo->buf); } void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 760bfc591c0..0ebb5ed0385 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -74,7 +74,7 @@ void r600_get_backend_mask(struct r600_context *ctx) goto err; /* initialize buffer with zeroes */ - results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_WRITE, NULL); + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_WRITE); if (results) { memset(results, 0, ctx->max_db * 4 * 4); r600_bo_unmap(ctx->radeon, buffer); @@ -92,7 +92,7 @@ void r600_get_backend_mask(struct r600_context *ctx) r600_context_flush(ctx, 0); /* analyze results */ - results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL); + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_READ); if (results) { for(i = 0; i < ctx->max_db; i++) { /* at least highest bit will be set if backend is used */ @@ -1576,9 +1576,9 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu u32 *results, *current_result; if (wait) - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_READ); else - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ); if (!results) return FALSE; @@ -1646,7 +1646,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) u32 *results; int i; - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_WRITE, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_WRITE); if (results) { results = (u32*)((char*)results + query->results_end); memset(results, 0, query->result_size); diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 952f91df89a..90860f4e31b 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -66,8 +66,6 @@ struct radeon_bo { struct radeon_winsys_cs_handle *cs_buf; unsigned handle; unsigned size; - int map_count; - void *data; unsigned last_flush; unsigned binding; @@ -92,9 +90,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, unsigned size, unsigned alignment, unsigned bind, unsigned initial_domain); void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, struct radeon_bo *src); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); -int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); /* * r600_hw_context.c @@ -123,8 +118,8 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6 assert(bo != NULL); - reloc_index = - ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains); + reloc_index = ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf, + rbo->domains, rbo->domains); if (reloc_index >= ctx->creloc) ctx->creloc = reloc_index+1; @@ -138,22 +133,4 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6 */ void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); - -/* - * radeon_bo.c - */ -static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) -{ - if (bo->map_count == 0 && !bo->data) - return radeon_bo_fixed_map(radeon, bo); - bo->map_count++; - return 0; -} - -static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - bo->map_count--; - assert(bo->map_count >= 0); -} - #endif diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index f2f53a14a58..14f8d340d67 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -32,43 +32,6 @@ #include #include -int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_mmap args; - void *ptr; - int r; - - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.offset = 0; - args.size = (uint64_t)bo->size; - r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_MMAP, - &args, sizeof(args)); - if (r) { - fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", - bo, bo->handle, r); - return r; - } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->info.fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - fprintf(stderr, "%s failed to map bo\n", __func__); - return -errno; - } - bo->data = ptr; - - bo->map_count++; - return 0; -} - -static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - if (bo->data) { - munmap(bo->data, bo->size); - bo->data = NULL; - } -} - #include "state_tracker/drm_driver.h" struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, @@ -102,7 +65,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) { - radeon_bo_fixed_unmap(radeon, bo); pb_reference(&bo->buf, NULL); FREE(bo); } @@ -117,34 +79,3 @@ void radeon_bo_reference(struct radeon *radeon, } *dst = src; } - -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_wait_idle args; - int ret; - - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - do { - ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)); - } while (ret == -EBUSY); - return ret; -} - -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain) -{ - struct drm_radeon_gem_busy args; - int ret; - - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.domain = 0; - - ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)); - - *domain = args.domain; - return ret; -} From 5229ba494b4b3f19085d13131a37626b914d4014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 05:40:16 +0200 Subject: [PATCH 330/600] r600g: remove radeon_bo::handle This should be private to radeon_winsys. Reviewed-by: Alex Deucher --- src/gallium/winsys/r600/drm/r600_hw_context.c | 6 +++--- src/gallium/winsys/r600/drm/r600_priv.h | 1 - src/gallium/winsys/r600/drm/radeon_bo.c | 1 - src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 7 ------- src/gallium/winsys/radeon/drm/radeon_winsys.h | 4 ---- 5 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 0ebb5ed0385..b2da3eb0458 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1130,11 +1130,11 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (!dirty) { if (is_vertex) { - if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) + if (block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } else { - if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) || - (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle)) + if ((block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf) || + (block->reloc[2].bo->bo->buf != state->bo[1]->bo->buf)) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 90860f4e31b..1f311c4d5e3 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -64,7 +64,6 @@ struct radeon_bo { struct pipe_reference reference; struct pb_buffer *buf; struct radeon_winsys_cs_handle *cs_buf; - unsigned handle; unsigned size; unsigned last_flush; diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 14f8d340d67..1d3766e55b5 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -58,7 +58,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, return NULL; } bo->cs_buf = radeon->ws->buffer_get_cs_handle(bo->buf); - bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf); bo->size = size; return bo; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 58898d3423e..609a9065db8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -617,11 +617,6 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, return TRUE; } -static unsigned trans_get_buffer_handle(struct pb_buffer *buf) -{ - return get_radeon_bo(buf)->handle; -} - void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) { ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; @@ -634,6 +629,4 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_create = radeon_winsys_bo_create; ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; - - ws->base.trans_get_buffer_handle = trans_get_buffer_handle; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 47274055207..9f70c5c2f9f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -329,10 +329,6 @@ struct radeon_winsys { boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, enum radeon_feature_id fid, boolean enable); - - - /* Transitional functions for r600g when moving to winsys/radeon */ - unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf); }; #endif From c6fec83726d3435a800f0a4e3ded89628b1a504f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 06:11:45 +0200 Subject: [PATCH 331/600] r600g: merge radeon_bo with r600_bo Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 2 +- src/gallium/winsys/r600/drm/Makefile | 1 - src/gallium/winsys/r600/drm/SConscript | 1 - src/gallium/winsys/r600/drm/r600_bo.c | 28 ++++--- src/gallium/winsys/r600/drm/r600_hw_context.c | 21 +++-- src/gallium/winsys/r600/drm/r600_priv.h | 34 ++------ src/gallium/winsys/r600/drm/radeon_bo.c | 80 ------------------- 7 files changed, 33 insertions(+), 134 deletions(-) delete mode 100644 src/gallium/winsys/r600/drm/radeon_bo.c diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 0c70fe2bb0a..a8626d1d2ec 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -242,7 +242,7 @@ struct r600_context { unsigned init_dwords; unsigned creloc; - struct radeon_bo **bo; + struct r600_bo **bo; u32 *pm4; unsigned pm4_cdwords; diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index e5b58d6cf87..5ad183d78ae 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -6,7 +6,6 @@ LIBNAME = r600winsys C_SOURCES = \ evergreen_hw_context.c \ - radeon_bo.c \ radeon_pciid.c \ r600_bo.c \ r600_drm.c \ diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index 3665b6eaeef..ca51b52ea72 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -4,7 +4,6 @@ env = env.Clone() r600_sources = [ 'evergreen_hw_context.c', - 'radeon_bo.c', 'radeon_pciid.c', 'r600_bo.c', 'r600_drm.c', diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 184efcc0e9a..b40508665b7 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -33,7 +33,7 @@ struct r600_bo *r600_bo(struct radeon *radeon, unsigned binding, unsigned usage) { struct r600_bo *bo; - struct radeon_bo *rbo; + struct pb_buffer *pb; uint32_t initial_domain, domains; /* Staging resources particpate in transfers and blits only @@ -61,14 +61,15 @@ struct r600_bo *r600_bo(struct radeon *radeon, } } - rbo = radeon_bo(radeon, 0, size, alignment, binding, initial_domain); - if (rbo == NULL) { + pb = radeon->ws->buffer_create(radeon->ws, size, alignment, binding, initial_domain); + if (!pb) { return NULL; } bo = calloc(1, sizeof(struct r600_bo)); bo->domains = domains; - bo->bo = rbo; + bo->buf = pb; + bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); pipe_reference_init(&bo->reference, 1); return bo; @@ -77,17 +78,18 @@ struct r600_bo *r600_bo(struct radeon *radeon, struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, unsigned *stride, unsigned *array_mode) { + struct pb_buffer *pb; struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); - struct radeon_bo *rbo; - rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0, 0); - if (rbo == NULL) { + pb = bo->buf = radeon->ws->buffer_from_handle(radeon->ws, whandle, stride, NULL); + if (!pb) { free(bo); return NULL; } pipe_reference_init(&bo->reference, 1); bo->domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); if (stride) *stride = whandle->stride; @@ -95,7 +97,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan if (array_mode) { enum radeon_bo_layout micro, macro; - radeon->ws->buffer_get_tiling(rbo->buf, µ, ¯o); + radeon->ws->buffer_get_tiling(bo->buf, µ, ¯o); if (macro == RADEON_LAYOUT_TILED) *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; @@ -109,22 +111,22 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage) { - return radeon->ws->buffer_map(bo->bo->buf, cs, usage); + return radeon->ws->buffer_map(bo->buf, cs, usage); } void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) { - radeon->ws->buffer_unmap(bo->bo->buf); + radeon->ws->buffer_unmap(bo->buf); } void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) { - radeon_bo_reference(radeon, &bo->bo, NULL); + pb_reference(&bo->buf, NULL); free(bo); } boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo, - unsigned stride, struct winsys_handle *whandle) + unsigned stride, struct winsys_handle *whandle) { - return radeon->ws->buffer_get_handle(bo->bo->buf, stride, whandle); + return radeon->ws->buffer_get_handle(bo->buf, stride, whandle); } diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index b2da3eb0458..38713aad1fe 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -951,11 +951,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) } void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, - unsigned flush_mask, struct r600_bo *rbo) + unsigned flush_mask, struct r600_bo *bo) { - struct radeon_bo *bo; - - bo = rbo->bo; /* if bo has already been flushed */ if (!(~bo->last_flush & flush_flags)) { bo->last_flush &= flush_mask; @@ -987,11 +984,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; - ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; + ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->base.size + 255) >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, rbo); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo); } bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } @@ -1107,7 +1104,7 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); if (block->reloc[1].bo) - block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE; + block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE; r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); @@ -1130,11 +1127,11 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (!dirty) { if (is_vertex) { - if (block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf) + if (block->reloc[1].bo->buf != state->bo[0]->buf) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } else { - if ((block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf) || - (block->reloc[2].bo->bo->buf != state->bo[1]->bo->buf)) + if ((block->reloc[1].bo->buf != state->bo[0]->buf) || + (block->reloc[2].bo->buf != state->bo[1]->buf)) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } @@ -1150,7 +1147,7 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ /* TEXTURE RESOURCE */ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); - state->bo[0]->bo->binding |= BO_BOUND_TEXTURE; + state->bo[0]->binding |= BO_BOUND_TEXTURE; } if (is_vertex) @@ -1515,7 +1512,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) /* restart */ for (int i = 0; i < ctx->creloc; i++) { ctx->bo[i]->last_flush = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + r600_bo_reference(ctx->radeon, &ctx->bo[i], NULL); } ctx->creloc = 0; ctx->pm4_dirty_cdwords = 0; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 1f311c4d5e3..82deeb8496e 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -60,21 +60,15 @@ struct r600_reg { }; #define BO_BOUND_TEXTURE 1 -struct radeon_bo { - struct pipe_reference reference; - struct pb_buffer *buf; - struct radeon_winsys_cs_handle *cs_buf; - unsigned size; - - unsigned last_flush; - unsigned binding; -}; struct r600_bo { struct pipe_reference reference; /* this must be the first member for the r600_bo_reference inline to work */ /* DO NOT MOVE THIS ^ */ + struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; unsigned domains; - struct radeon_bo *bo; + unsigned last_flush; + unsigned binding; }; /* @@ -82,14 +76,6 @@ struct r600_bo { */ unsigned radeon_family_from_device(unsigned device); -/* - * radeon_bo.c - */ -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned bind, unsigned initial_domain); -void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, - struct radeon_bo *src); - /* * r600_hw_context.c */ @@ -112,18 +98,14 @@ int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsig static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo) { - struct radeon_bo *bo = rbo->bo; - unsigned reloc_index; - - assert(bo != NULL); - - reloc_index = ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf, - rbo->domains, rbo->domains); + unsigned reloc_index = + ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, + rbo->domains, rbo->domains); if (reloc_index >= ctx->creloc) ctx->creloc = reloc_index+1; - radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo); + r600_bo_reference(ctx->radeon, &ctx->bo[reloc_index], rbo); return reloc_index * 4; } diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c deleted file mode 100644 index 1d3766e55b5..00000000000 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#define _FILE_OFFSET_BITS 64 -#include "r600_priv.h" -#include "util/u_hash_table.h" -#include "util/u_memory.h" -#include "radeon_drm.h" -#include "xf86drm.h" -#include -#include - -#include "state_tracker/drm_driver.h" - -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned bind, - unsigned initial_domain) -{ - struct radeon_bo *bo; - struct winsys_handle whandle = {}; - whandle.handle = handle; - - bo = calloc(1, sizeof(*bo)); - if (bo == NULL) { - return NULL; - } - pipe_reference_init(&bo->reference, 1); - - if (handle) { - bo->buf = radeon->ws->buffer_from_handle(radeon->ws, &whandle, NULL, &size); - } else { - bo->buf = radeon->ws->buffer_create(radeon->ws, size, alignment, bind, initial_domain); - } - if (!bo->buf) { - FREE(bo); - return NULL; - } - bo->cs_buf = radeon->ws->buffer_get_cs_handle(bo->buf); - bo->size = size; - return bo; -} - -static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) -{ - pb_reference(&bo->buf, NULL); - FREE(bo); -} - -void radeon_bo_reference(struct radeon *radeon, - struct radeon_bo **dst, - struct radeon_bo *src) -{ - struct radeon_bo *old = *dst; - if (pipe_reference(&(*dst)->reference, &src->reference)) { - radeon_bo_destroy(radeon, old); - } - *dst = src; -} From 041ed559e11ee99d720c8132428c07d8fe57ec81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 06:17:39 +0200 Subject: [PATCH 332/600] r600g: remove an unused parameter from r600_bo_destroy Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 6 ++--- src/gallium/drivers/r600/r600_asm.c | 2 +- src/gallium/drivers/r600/r600_buffer.c | 4 ++-- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_shader.c | 4 +--- src/gallium/drivers/r600/r600_state_common.c | 4 ++-- src/gallium/drivers/r600/r600_texture.c | 3 +-- src/gallium/winsys/r600/drm/r600_bo.c | 2 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 24 +++++++++---------- src/gallium/winsys/r600/drm/r600_priv.h | 7 +----- 10 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index a8626d1d2ec..b5d2d74628a 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -107,15 +107,15 @@ void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, unsigned stride, struct winsys_handle *whandle); -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); +void r600_bo_destroy(struct r600_bo *bo); /* this relies on the pipe_reference being the first member of r600_bo */ -static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) +static INLINE void r600_bo_reference(struct r600_bo **dst, struct r600_bo *src) { struct r600_bo *old = *dst; if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) { - r600_bo_destroy(radeon, old); + r600_bo_destroy(old); } *dst = src; } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f5244a723f4..6092432e6f2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2234,7 +2234,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (bytecode == NULL) { r600_bc_clear(&bc); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + r600_bo_reference(&ve->fetch_shader, NULL); return -ENOMEM; } diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index b8c6a419748..bc70578dc9f 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -46,7 +46,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { - r600_bo_reference(rscreen->radeon, &rbuffer->r.bo, NULL); + r600_bo_reference(&rbuffer->r.bo, NULL); } rbuffer->r.bo = NULL; util_slab_free(&rscreen->pool_buffers, rbuffer); @@ -230,7 +230,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { - r600_bo_reference(rw, &bo, NULL); + r600_bo_reference(&bo, NULL); return NULL; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 1072ea0744d..4051584f272 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -187,7 +187,7 @@ static void r600_destroy_context(struct pipe_context *context) } r600_bo_unmap(rctx->radeon, rctx->fences.bo); - r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL); + r600_bo_reference(&rctx->fences.bo, NULL); } r600_update_num_contexts(rctx->screen, -1); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0f226ebd52a..f86804eadcf 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -154,9 +154,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - r600_bo_reference(rctx->radeon, &shader->bo, NULL); + r600_bo_reference(&shader->bo, NULL); r600_bc_clear(&shader->shader.bc); memset(&shader->shader,0,sizeof(struct r600_shader)); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 9f3ab89fdf7..2831517fe86 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -150,7 +150,7 @@ void r600_delete_state(struct pipe_context *ctx, void *state) rctx->states[rstate->id] = NULL; } for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + r600_bo_reference(&rstate->regs[i].bo, NULL); } free(rstate); } @@ -181,7 +181,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) if (rctx->vertex_elements == state) rctx->vertex_elements = NULL; - r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + r600_bo_reference(&v->fetch_shader, NULL); u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index f9f0d702008..ed0b2ec2890 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -339,13 +339,12 @@ static void r600_texture_destroy(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = ((struct r600_screen*)screen)->radeon; if (rtex->flushed_depth_texture) pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); + r600_bo_reference(&resource->bo, NULL); } FREE(rtex); } diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index b40508665b7..123f718e664 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -119,7 +119,7 @@ void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) radeon->ws->buffer_unmap(bo->buf); } -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) +void r600_bo_destroy(struct r600_bo *bo) { pb_reference(&bo->buf, NULL); free(bo); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 38713aad1fe..f39fc69aee7 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -103,7 +103,7 @@ void r600_get_backend_mask(struct r600_context *ctx) } } - r600_bo_reference(ctx->radeon, &buffer, NULL); + r600_bo_reference(&buffer, NULL); if (mask != 0) { ctx->backend_mask = mask; @@ -738,7 +738,7 @@ static void r600_free_resource_range(struct r600_context *ctx, struct r600_range block = range->blocks[i]; if (block) { for (int k = 1; k <= block->nbo; k++) - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); free(block); } } @@ -763,7 +763,7 @@ void r600_context_fini(struct r600_context *ctx) range->blocks[CTX_BLOCK_ID(offset)] = NULL; } for (int k = 1; k <= block->nbo; k++) { - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); } free(block); } @@ -1068,7 +1068,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat if (block->pm4_bo_index[id]) { /* find relocation */ reloc_id = block->pm4_bo_index[id]; - r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo); + r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo); /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } @@ -1106,8 +1106,8 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (block->reloc[1].bo) block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE; - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); + r600_bo_reference(&block->reloc[1].bo, NULL); + r600_bo_reference(&block->reloc[2].bo, NULL); LIST_DELINIT(&block->list); LIST_DELINIT(&block->enable_list); return; @@ -1141,12 +1141,12 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + r600_bo_reference(&block->reloc[2].bo, NULL); } else { /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + r600_bo_reference(&block->reloc[2].bo, state->bo[1]); state->bo[0]->binding |= BO_BOUND_TEXTURE; } @@ -1512,7 +1512,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) /* restart */ for (int i = 0; i < ctx->creloc; i++) { ctx->bo[i]->last_flush = 0; - r600_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + r600_bo_reference(&ctx->bo[i], NULL); } ctx->creloc = 0; ctx->pm4_dirty_cdwords = 0; @@ -1793,7 +1793,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) { - r600_bo_reference(ctx->radeon, &query->buffer, NULL); + r600_bo_reference(&query->buffer, NULL); LIST_DELINIT(&query->list); free(query); } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 82deeb8496e..df412a08144 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -105,13 +105,8 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6 if (reloc_index >= ctx->creloc) ctx->creloc = reloc_index+1; - r600_bo_reference(ctx->radeon, &ctx->bo[reloc_index], rbo); + r600_bo_reference(&ctx->bo[reloc_index], rbo); return reloc_index * 4; } -/* - * r600_bo.c - */ -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); - #endif From efbccfeca071b052bb8da0a7f0277000869b2ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 06:19:17 +0200 Subject: [PATCH 333/600] winsys/radeon: remove the device file descriptor from the interface r600g doesn't need it anymore. Reviewed-by: Alex Deucher --- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 1 - src/gallium/winsys/radeon/drm/radeon_winsys.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 1f3bd6dd7bd..e234321d934 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -316,7 +316,6 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) } ws->fd = fd; - ws->info.fd = fd; if (!do_winsys_init(ws)) goto fail; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 9f70c5c2f9f..bf5b144fe2c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -73,7 +73,6 @@ struct radeon_info { uint32_t pci_id; uint32_t gart_size; uint32_t vram_size; - uint32_t fd; /* XXX transitional */ uint32_t drm_major; /* version */ uint32_t drm_minor; From d6da5185f96c1a85390e08dc2ef36c04d6e0de11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 06:23:59 +0200 Subject: [PATCH 334/600] r600g: don't include radeon_drm.h and xf86drm.h Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600_buffer.c | 3 --- src/gallium/winsys/r600/drm/evergreen_hw_context.c | 2 -- src/gallium/winsys/r600/drm/r600_bo.c | 13 ++++++------- src/gallium/winsys/r600/drm/r600_drm.c | 2 -- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 -- src/gallium/winsys/r600/drm/r600_priv.h | 1 - 6 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index bc70578dc9f..ca2415adb28 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -33,9 +33,6 @@ #include #include "util/u_upload_mgr.h" -#include -#include "radeon_drm.h" - #include "r600.h" #include "r600_pipe.h" diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 1d582ceeaa5..54e26b3f0e9 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -27,8 +27,6 @@ #include "r600_priv.h" #include "evergreend.h" #include "util/u_memory.h" -#include "radeon_drm.h" -#include "xf86drm.h" #include #define GROUP_FORCE_NEW_BLOCK 0 diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 123f718e664..4beedad233e 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -26,7 +26,6 @@ #include "r600_priv.h" #include "r600d.h" #include "state_tracker/drm_driver.h" -#include "radeon_drm.h" struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, @@ -41,22 +40,22 @@ struct r600_bo *r600_bo(struct radeon *radeon, * resources. We generate them internally for some transfers. */ if (usage == PIPE_USAGE_STAGING) { - domains = RADEON_GEM_DOMAIN_GTT; - initial_domain = RADEON_GEM_DOMAIN_GTT; + domains = RADEON_DOMAIN_GTT; + initial_domain = RADEON_DOMAIN_GTT; } else { - domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; switch(usage) { case PIPE_USAGE_DYNAMIC: case PIPE_USAGE_STREAM: case PIPE_USAGE_STAGING: - initial_domain = RADEON_GEM_DOMAIN_GTT; + initial_domain = RADEON_DOMAIN_GTT; break; case PIPE_USAGE_DEFAULT: case PIPE_USAGE_STATIC: case PIPE_USAGE_IMMUTABLE: default: - initial_domain = RADEON_GEM_DOMAIN_VRAM; + initial_domain = RADEON_DOMAIN_VRAM; break; } } @@ -88,7 +87,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan } pipe_reference_init(&bo->reference, 1); - bo->domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + bo->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); if (stride) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index a1b0ba1fb0f..17d098aed96 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -29,8 +29,6 @@ #include "r600_priv.h" #include "r600_drm_public.h" #include "util/u_memory.h" -#include -#include #include #ifndef RADEON_INFO_NUM_TILE_PIPES diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index f39fc69aee7..577988d37ea 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -26,8 +26,6 @@ #include "r600_priv.h" #include "r600d.h" #include "util/u_memory.h" -#include "radeon_drm.h" -#include "xf86drm.h" #include #define GROUP_FORCE_NEW_BLOCK 0 diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index df412a08144..c5b82fd43ae 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -30,7 +30,6 @@ #include "../../radeon/drm/radeon_winsys.h" #include "util/u_hash_table.h" #include "os/os_thread.h" -#include "radeon_drm.h" #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) From 0bbbd82488c11710aaca79ed3db2b605888ac65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 06:33:04 +0200 Subject: [PATCH 335/600] r600g: undefine RADEON_CTX_MAX_PM4 winsys/radeon has its own definition. Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 2 -- src/gallium/winsys/r600/drm/evergreen_hw_context.c | 6 +++--- src/gallium/winsys/r600/drm/r600_hw_context.c | 6 +++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index b5d2d74628a..232912f914d 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -29,8 +29,6 @@ #include "util/u_double_list.h" #include "util/u_inlines.h" -#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) - #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 54e26b3f0e9..63b3dc3940d 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -987,12 +987,12 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->cs = radeon->ws->cs_create(radeon->ws); /* allocate cs variables */ - ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); @@ -1152,7 +1152,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 577988d37ea..f89e8d6548d 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -911,12 +911,12 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->cs = radeon->ws->cs_create(radeon->ws); /* allocate cs variables */ - ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); @@ -1432,7 +1432,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ From c79e9f0ed59d561849a0a4fbaafe87d5064d3e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Aug 2011 07:05:07 +0200 Subject: [PATCH 336/600] r600g: enable thread offloading Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600_pipe.c | 5 ++--- src/gallium/drivers/r600/r600_pipe.h | 5 +++++ src/gallium/drivers/r600/r600_texture.c | 4 ++-- src/gallium/winsys/r600/drm/evergreen_hw_context.c | 2 +- src/gallium/winsys/r600/drm/r600_drm.c | 3 --- src/gallium/winsys/r600/drm/r600_hw_context.c | 14 +++++++------- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 4051584f272..5d09d59e111 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -117,9 +117,8 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) } -static void r600_flush(struct pipe_context *ctx, - struct pipe_fence_handle **fence, - unsigned flags) +void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence **rfence = (struct r600_fence**)fence; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 3ca003aa244..7ab785ee2df 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -273,6 +273,11 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); + +/* r600_pipe.c */ +void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags); + /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index ed0b2ec2890..7b5a3e74a26 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -66,7 +66,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 rtransfer->staging_texture, 0, &sbox); - ctx->flush(ctx, NULL); + r600_flush(ctx, NULL, RADEON_FLUSH_ASYNC); } unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, @@ -645,7 +645,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, NULL); + r600_flush(ctx, NULL, 0); } return &trans->transfer; } diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 63b3dc3940d..eaf461833c7 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1156,7 +1156,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 17d098aed96..7d5583fd287 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -249,9 +249,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws) if (radeon_drm_get_tiling(radeon)) return NULL; - /* XXX disable ioctl thread offloading until the porting is done. */ - setenv("RADEON_THREAD", "0", 0); - return radeon; } diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index f89e8d6548d..ba8d6c2aa64 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -938,7 +938,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); @@ -1436,7 +1436,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1549,7 +1549,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); @@ -1611,7 +1611,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { @@ -1622,7 +1622,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) query->queries_emitted = 1; } else { if (++query->queries_emitted > query->buffer_size / query->result_size / 2) - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } } @@ -1714,7 +1714,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, { if (operation == PREDICATION_OP_CLEAR) { if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords) - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; @@ -1730,7 +1730,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, count /= query->result_size; if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords) - r600_context_flush(ctx, 0); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); From 296b8990956fcbd7ce47902d7c108a5973db9397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 7 Aug 2011 18:42:29 +0200 Subject: [PATCH 337/600] winsys/radeon: remove broken bo-is-busy-for-write guessing Reviewed-by: Alex Deucher --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 19 ------------------- src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 7 ------- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 5 ----- 3 files changed, 31 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 609a9065db8..1c8a2b8305e 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -99,8 +99,6 @@ static void radeon_bo_wait(struct pb_buffer *_buf) args.handle = bo->handle; while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args)) == -EBUSY); - - bo->busy_for_write = FALSE; } static boolean radeon_bo_is_busy(struct pb_buffer *_buf) @@ -117,8 +115,6 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf) busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)) != 0; - if (!busy) - bo->busy_for_write = FALSE; return busy; } @@ -196,21 +192,6 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, /* XXX We could check whether the buffer is busy for write here. */ radeon_bo_wait((struct pb_buffer*)bo); } -#if 0 - /* XXX This per-winsys busy-for-write tracking sucks. - * What if some other process wrote something, e.g. using - * DRI2CopyRegion? We wouldn't get the busy_for_write flag - * set, skipping bo_wait. - * We need to move the is-busy-for-write query into the kernel. - */ - } else if (bo->busy_for_write) { - /* Update the busy_for_write field (done by radeon_bo_is_busy) - * and wait if needed. */ - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { - radeon_bo_wait((struct pb_buffer*)bo); - } - } -#endif } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index f4ea73a2210..047ea6b1cf2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -60,13 +60,6 @@ struct radeon_bo { * thread, is this bo referenced in? */ int num_active_ioctls; - /* Whether the buffer has been relocated for write and is busy since then. - * This field is updated in: - * - radeon_drm_cs_flush (to TRUE if it's relocated for write) - * - radeon_bo_is_busy (to FALSE if it's not busy) - * - radeon_bo_wait (to FALSE) */ - boolean busy_for_write; - boolean flinked; uint32_t flink; }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 1ec324e5b74..c309354785a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -411,11 +411,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); - - /* Update whether the buffer is busy for write. */ - if (cs->csc->relocs[i].write_domain) { - cs->csc->relocs_bo[i]->busy_for_write = TRUE; - } } if (cs->ws->num_cpus > 1 && debug_get_option_thread() && From 1e3c81a068c4ae04cd1c6b18c687d5be69b7b8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 7 Aug 2011 19:04:37 +0200 Subject: [PATCH 338/600] winsys/radeon: hook up the new DRM_RADEON_GEM_WAIT ioctl Reviewed-by: Alex Deucher --- src/gallium/drivers/r300/r300_screen.c | 6 +- src/gallium/drivers/r300/r300_transfer.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 67 ++++++++++++++----- src/gallium/winsys/radeon/drm/radeon_winsys.h | 14 +++- 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 674bd24953c..13d25ba7dba 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -454,7 +454,7 @@ static boolean r300_fence_signalled(struct pipe_screen *screen, struct radeon_winsys *rws = r300_screen(screen)->rws; struct pb_buffer *rfence = (struct pb_buffer*)fence; - return !rws->buffer_is_busy(rfence); + return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); } static boolean r300_fence_finish(struct pipe_screen *screen, @@ -471,7 +471,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, timeout /= 1000; /* Wait in a loop. */ - while (rws->buffer_is_busy(rfence)) { + while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { if (os_time_get() - start_time >= timeout) { return FALSE; } @@ -480,7 +480,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, return TRUE; } - rws->buffer_wait(rfence); + rws->buffer_wait(rfence, RADEON_USAGE_READWRITE); return TRUE; } diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index e2ea4cbf6c5..65964020adc 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -97,7 +97,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, referenced_hw = TRUE; } else { referenced_hw = - r300->rws->buffer_is_busy(tex->buf); + r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE); } blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 1c8a2b8305e..5c91ec48942 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -43,6 +43,21 @@ #define RADEON_BO_FLAGS_MICRO_TILE 2 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 +#ifndef DRM_RADEON_GEM_WAIT +#define DRM_RADEON_GEM_WAIT 0x2b + +#define RADEON_GEM_NO_WAIT 0x1 +#define RADEON_GEM_USAGE_READ 0x2 +#define RADEON_GEM_USAGE_WRITE 0x4 + +struct drm_radeon_gem_wait { + uint32_t handle; + uint32_t flags; /* one of RADEON_GEM_* */ +}; + +#endif + + extern const struct pb_vtbl radeon_bo_vtbl; @@ -87,35 +102,49 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) return bo; } -static void radeon_bo_wait(struct pb_buffer *_buf) +static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage) { struct radeon_bo *bo = get_radeon_bo(_buf); - struct drm_radeon_gem_wait_idle args = {}; while (p_atomic_read(&bo->num_active_ioctls)) { sched_yield(); } - args.handle = bo->handle; - while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)) == -EBUSY); + if (bo->rws->info.drm_minor >= 12) { + struct drm_radeon_gem_wait args = {}; + args.handle = bo->handle; + args.flags = usage; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, + &args, sizeof(args)) == -EBUSY); + } else { + struct drm_radeon_gem_wait_idle args = {}; + args.handle = bo->handle; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); + } } -static boolean radeon_bo_is_busy(struct pb_buffer *_buf) +static boolean radeon_bo_is_busy(struct pb_buffer *_buf, + enum radeon_bo_usage usage) { struct radeon_bo *bo = get_radeon_bo(_buf); - struct drm_radeon_gem_busy args = {}; - boolean busy; if (p_atomic_read(&bo->num_active_ioctls)) { return TRUE; } - args.handle = bo->handle; - busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)) != 0; - - return busy; + if (bo->rws->info.drm_minor >= 12) { + struct drm_radeon_gem_wait args = {}; + args.handle = bo->handle; + args.flags = usage | RADEON_GEM_NO_WAIT; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, + &args, sizeof(args)) != 0; + } else { + struct drm_radeon_gem_busy args = {}; + args.handle = bo->handle; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; + } } static void radeon_bo_destroy(struct pb_buffer *_buf) @@ -173,7 +202,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, return NULL; } - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { + if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { return NULL; } } else { @@ -187,10 +216,12 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); - radeon_bo_wait((struct pb_buffer*)bo); + radeon_bo_wait((struct pb_buffer*)bo, + RADEON_USAGE_READWRITE); } else { /* XXX We could check whether the buffer is busy for write here. */ - radeon_bo_wait((struct pb_buffer*)bo); + radeon_bo_wait((struct pb_buffer*)bo, + RADEON_USAGE_READWRITE); } } else { /* Mapping for write. */ @@ -202,7 +233,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, radeon_drm_cs_sync_flush(cs); } - radeon_bo_wait((struct pb_buffer*)bo); + radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); } } } @@ -338,7 +369,7 @@ static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr, return TRUE; } - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { + if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { return TRUE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index bf5b144fe2c..90583e3ab8c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -61,6 +61,12 @@ enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_VRAM = 4 }; +enum radeon_bo_usage { /* bitfield */ + RADEON_USAGE_READ = 2, + RADEON_USAGE_WRITE = 4, + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE +}; + struct winsys_handle; struct radeon_winsys_cs_handle; /* for write_reloc etc. */ @@ -162,8 +168,10 @@ struct radeon_winsys { * Return TRUE if a buffer object is being used by the GPU. * * \param buf A winsys buffer object. + * \param usage Only check whether the buffer is busy for the given usage. */ - boolean (*buffer_is_busy)(struct pb_buffer *buf); + boolean (*buffer_is_busy)(struct pb_buffer *buf, + enum radeon_bo_usage usage); /** * Wait for a buffer object until it is not used by a GPU. This is @@ -171,8 +179,10 @@ struct radeon_winsys { * and synchronizing to the fence. * * \param buf A winsys buffer object to wait for. + * \param usage Only wait until the buffer is idle for the given usage, + * but may still be busy for some other usage. */ - void (*buffer_wait)(struct pb_buffer *buf); + void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage); /** * Return tiling flags describing a memory layout of a buffer object. From ebfcc58b93cc08c534857c2314694e35b29690ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 7 Aug 2011 19:18:16 +0200 Subject: [PATCH 339/600] winsys/radeon: take advantage of the new ioctl Reviewed-by: Alex Deucher --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 5c91ec48942..adfbefd897b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -197,13 +197,33 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ if (flags & PB_USAGE_DONTBLOCK) { - if (radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); - return NULL; - } + if (!(flags & PB_USAGE_CPU_WRITE)) { + /* Mapping for read. + * + * Since we are mapping for read, we don't need to wait + * if the GPU is using the buffer for read too + * (neither one is changing it). + * + * Only check whether the buffer is being used for write. */ + if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + return NULL; + } - if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { - return NULL; + if (radeon_bo_is_busy((struct pb_buffer*)bo, + RADEON_USAGE_WRITE)) { + return NULL; + } + } else { + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + return NULL; + } + + if (radeon_bo_is_busy((struct pb_buffer*)bo, + RADEON_USAGE_READWRITE)) { + return NULL; + } } } else { if (!(flags & PB_USAGE_CPU_WRITE)) { @@ -216,13 +236,9 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); - radeon_bo_wait((struct pb_buffer*)bo, - RADEON_USAGE_READWRITE); - } else { - /* XXX We could check whether the buffer is busy for write here. */ - radeon_bo_wait((struct pb_buffer*)bo, - RADEON_USAGE_READWRITE); } + radeon_bo_wait((struct pb_buffer*)bo, + RADEON_USAGE_WRITE); } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { From 47dcfb8dab517e2c92af2f4813b0f5ad200b8b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 7 Aug 2011 21:14:38 +0200 Subject: [PATCH 340/600] r600g: set read/write usage flags for each relocation This takes advantage of the new GEM_WAIT ioctl when mapping buffers. Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/evergreen_state.c | 560 +++++++++--------- src/gallium/drivers/r600/r600.h | 20 +- src/gallium/drivers/r600/r600_pipe.h | 6 +- src/gallium/drivers/r600/r600_state.c | 370 ++++++------ src/gallium/drivers/r600/r600_state_common.c | 52 +- .../winsys/r600/drm/evergreen_hw_context.c | 2 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 45 +- src/gallium/winsys/r600/drm/r600_priv.h | 10 +- 8 files changed, 550 insertions(+), 515 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c9eaf94a2ae..f82e20306d1 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -638,10 +638,10 @@ static void evergreen_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; @@ -686,13 +686,13 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFD, NULL); + color_control, 0xFFFFFFFD, NULL, 0); if (rctx->chip_class != CAYMAN) - r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); else { - r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); } for (int i = 0; i < 8; i++) { @@ -723,7 +723,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } for (int i = 0; i < 8; i++) { - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL, 0); } return rstate; @@ -791,27 +791,27 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); + 0xFFFFFFFF & C_028430_STENCILREF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0); /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by * evergreen_pipe_shader_ps().*/ - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); - r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -856,7 +856,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -870,44 +870,44 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0); tmp = (unsigned)state->line_width * 8; - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); if (rctx->chip_class == CAYMAN) { - r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); } else { - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } - r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -933,22 +933,22 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | S_03C008_TYPE(1), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); if (uc.ui) { - r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -1016,6 +1016,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->bo[0] = bo[0]; rstate->bo[1] = bo[1]; + rstate->bo_usage[0] = RADEON_USAGE_READ; + rstate->bo_usage[1] = RADEON_USAGE_READ; rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | S_030000_NON_DISP_TILING_ORDER(tile_type) | @@ -1131,21 +1133,21 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, R_0285BC_PA_CL_UCP0_X + i * 16, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C0_PA_CL_UCP0_Y + i * 16, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C4_PA_CL_UCP0_Z + i * 16, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C8_PA_CL_UCP0_W + i * 16, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_CLIP]); rctx->states[R600_PIPE_STATE_CLIP] = rstate; @@ -1176,28 +1178,28 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1219,11 +1221,11 @@ static void evergreen_set_stencil_ref(struct pipe_context *ctx, tmp = S_028430_STENCILREF(state->ref_value[0]); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); + ~C_028430_STENCILREF, NULL, 0); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); + ~C_028434_STENCILREF_BF, NULL, 0); free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; @@ -1241,15 +1243,15 @@ static void evergreen_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -1354,28 +1356,28 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - offset >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C70_CB_COLOR0_INFO + cb * 0x3C, - color_info, 0xFFFFFFFF, bo[0]); + color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_PITCH_TILE_MAX(pitch), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, S_028C74_NON_DISP_TILING_ORDER(tile_type), - 0xFFFFFFFF, bo[0]); + 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); } static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, @@ -1407,33 +1409,33 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - offset >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - offset >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); if (stencil_format) { uint32_t stencil_offset; stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); } - r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, - S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); + S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); + 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } static void evergreen_set_framebuffer_state(struct pipe_context *ctx, @@ -1492,49 +1494,49 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); + 0x00000000, target_mask, NULL, 0); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); + shader_mask, 0xFFFFFFFF, NULL, 0); if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } else { r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); @@ -1609,78 +1611,78 @@ static void cayman_init_config(struct r600_pipe_context *rctx) tmp = 0x00000000; tmp |= S_008C00_EXPORT_SRC_C(1); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* always set the temp clauses */ - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0); - r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, NULL, 0); - r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL); - r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -1964,39 +1966,39 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* enable dynamic GPR resource management */ if (r600_get_minor_version(rctx->radeon) >= 7) { /* always set temp clauses */ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, - S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, S_028838_PS_GPRS(0x1e) | S_028838_VS_GPRS(0x1e) | S_028838_GS_GPRS(0x1e) | S_028838_ES_GPRS(0x1e) | S_028838_HS_GPRS(0x1e) | - S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ + S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL, 0); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ } else { tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0); } tmp = 0; @@ -2004,109 +2006,109 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads); tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008E2C_NUM_PS_LDS(0x1000); tmp |= S_008E2C_NUM_LS_LDS(0x1000); - r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL, 0); #if 0 - r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL, 0); #endif - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -2143,19 +2145,19 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); r600_pipe_state_add_reg(&state, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &state); } } @@ -2252,32 +2254,32 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_0286E0_LINEAR_CENTROID_ENA(have_centroid); r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, - spi_ps_in_control_0, 0xFFFFFFFF, NULL); + spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - spi_ps_in_control_1, 0xFFFFFFFF, NULL); + spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, - 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - 0, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | S_028844_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028848_SQ_PGM_RESOURCES_2_PS, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); + exports_ps, 0xFFFFFFFF, NULL, 0); /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */ /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, @@ -2286,10 +2288,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_02880C_Z_EXPORT_ENABLE(1) | S_02880C_STENCIL_EXPORT_ENABLE(1) | S_02880C_KILL_ENABLE(1), - NULL); + NULL, 0); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2314,7 +2316,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader for (i = 0; i < 10; i++) { r600_pipe_state_add_reg(rstate, R_02861C_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); + spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0); } /* Certain attributes (position, psize, etc.) don't count as params. @@ -2328,22 +2330,22 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(nparams - 1), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, S_028860_NUM_GPRS(rshader->bc.ngpr) | S_028860_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028864_SQ_PGM_RESOURCES_2_VS, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - 0, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void evergreen_fetch_shader(struct pipe_context *ctx, @@ -2354,10 +2356,10 @@ void evergreen_fetch_shader(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, 0, - 0xFFFFFFFF, ve->fetch_shader); + 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) @@ -2371,7 +2373,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, S_028000_DEPTH_COPY_ENABLE(1) | @@ -2379,7 +2381,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), S_028000_DEPTH_COPY_ENABLE(1) | S_028000_STENCIL_COPY_ENABLE(1) | - S_028000_COPY_CENTROID(1), NULL); + S_028000_COPY_CENTROID(1), NULL, 0); return rstate; } @@ -2405,9 +2407,11 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + unsigned offset, unsigned stride, + enum radeon_bo_usage usage) { rstate->bo[0] = rbuffer->bo; + rstate->bo_usage[0] = usage; rstate->val[0] = offset; rstate->val[1] = rbuffer->bo_size - offset - 1; rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 232912f914d..f24146edcf1 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,8 +26,8 @@ #ifndef R600_H #define R600_H +#include "../../winsys/radeon/drm/radeon_winsys.h" #include "util/u_double_list.h" -#include "util/u_inlines.h" #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) @@ -140,6 +140,7 @@ struct r600_pipe_reg { u32 mask; struct r600_block *block; struct r600_bo *bo; + enum radeon_bo_usage bo_usage; u32 id; }; @@ -152,7 +153,8 @@ struct r600_pipe_state { struct r600_pipe_resource_state { unsigned id; u32 val[8]; - struct r600_bo *bo[2]; + struct r600_bo *bo[2]; + enum radeon_bo_usage bo_usage[2]; /* XXX set these */ }; #define R600_BLOCK_STATUS_ENABLED (1 << 0) @@ -163,6 +165,7 @@ struct r600_pipe_resource_state { struct r600_block_reloc { struct r600_bo *bo; + enum radeon_bo_usage bo_usage; unsigned flush_flags; unsigned flush_mask; unsigned bo_pm4_index; @@ -311,12 +314,15 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, u32 range_id, u32 block_id, - struct r600_bo *bo); + struct r600_bo *bo, + enum radeon_bo_usage usage); void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, - struct r600_bo *bo); -#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo) + struct r600_bo *bo, + enum radeon_bo_usage usage); + +#define r600_pipe_state_add_reg(state, offset, value, mask, bo, usage) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo, usage) static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, u32 value) @@ -326,10 +332,12 @@ static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, } static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state, - u32 value, struct r600_bo *bo) + u32 value, struct r600_bo *bo, + enum radeon_bo_usage usage) { state->regs[state->nregs].value = value; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 7ab785ee2df..2747f54079c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -250,7 +250,8 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride); + unsigned offset, unsigned stride, + enum radeon_bo_usage usage); boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, @@ -302,7 +303,8 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride); + unsigned offset, unsigned stride, + enum radeon_bo_usage usage); void r600_adjust_gprs(struct r600_pipe_context *rctx); boolean r600_is_format_supported(struct pipe_screen *screen, enum pipe_format format, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 487b1df0052..0757eab2ea7 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -662,19 +662,19 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); r600_pipe_state_add_reg(&state, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &state); } } @@ -689,10 +689,10 @@ static void r600_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); @@ -742,7 +742,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx, blend->cb_target_mask = target_mask; /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFD, NULL); + color_control, 0xFFFFFFFD, NULL, 0); for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -773,9 +773,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx, /* R600 does not support per-MRT blends */ if (rctx->family > CHIP_R600) - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL, 0); if (i == 0) - r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -842,28 +842,28 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); + 0xFFFFFFFF & C_028430_STENCILREF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0); /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by * r600_pipe_shader_ps().*/ - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); - r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -907,7 +907,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -921,33 +921,33 @@ static void *r600_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0); tmp = (unsigned)state->line_width * 8; - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -977,17 +977,17 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL, 0); if (uc.ui) { - r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -1068,6 +1068,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->bo[0] = bo[0]; rstate->bo[1] = bo[1]; + rstate->bo_usage[0] = RADEON_USAGE_READ; + rstate->bo_usage[1] = RADEON_USAGE_READ; rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) | S_038000_TILE_MODE(array_mode) | @@ -1157,7 +1159,7 @@ static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean en rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP; r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)), - 1, NULL); + 1, NULL, 0); free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]); rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate; @@ -1215,21 +1217,21 @@ static void r600_set_clip_state(struct pipe_context *ctx, for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, R_028E20_PA_CL_UCP0_X + i * 16, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E24_PA_CL_UCP0_Y + i * 16, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E28_PA_CL_UCP0_Z + i * 16, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E2C_PA_CL_UCP0_W + i * 16, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_CLIP]); rctx->states[R600_PIPE_STATE_CLIP] = rstate; @@ -1260,28 +1262,28 @@ static void r600_set_scissor_state(struct pipe_context *ctx, br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1303,11 +1305,11 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, tmp = S_028430_STENCILREF(state->ref_value[0]); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); + ~C_028430_STENCILREF, NULL, 0); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); + ~C_028434_STENCILREF_BF, NULL, 0); free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; @@ -1325,15 +1327,15 @@ static void r600_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -1441,27 +1443,27 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - offset >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, - color_info, 0xFFFFFFFF, bo[0]); + color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028060_CB_COLOR0_SIZE + cb * 4, S_028060_PITCH_TILE_MAX(pitch) | S_028060_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028080_CB_COLOR0_VIEW + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0280E0_CB_COLOR0_FRAG + cb * 4, - 0, 0xFFFFFFFF, bo[1]); + 0, 0xFFFFFFFF, bo[1], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_0280C0_CB_COLOR0_TILE + cb * 4, - 0, 0xFFFFFFFF, bo[2]); + 0, 0xFFFFFFFF, bo[2], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028100_CB_COLOR0_MASK + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, @@ -1492,16 +1494,16 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - offset >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); + 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, - (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL); + (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL, 0); } static void r600_set_framebuffer_state(struct pipe_context *ctx, @@ -1546,59 +1548,59 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, - shader_control, 0xFFFFFFFF, NULL); + shader_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); + 0x00000000, target_mask, NULL, 0); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); + shader_mask, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL, - 0x01000000, 0xFFFFFFFF, NULL); + 0x01000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST, - 0x000000FF, 0xFFFFFFFF, NULL); + 0x000000FF, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; @@ -1695,7 +1697,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx) tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); rstate.nregs = 0; - r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL); + r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &rstate); } @@ -1866,20 +1868,20 @@ void r600_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_GPR_RESOURCE_MGMT_1 */ tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_GPR_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_THREAD_RESOURCE_MGMT */ tmp = 0; @@ -1887,78 +1889,78 @@ void r600_init_config(struct r600_pipe_context *rctx) tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_STACK_RESOURCE_MGMT_1 */ tmp = 0; tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_STACK_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL, 0); if (rctx->chip_class >= R700) { - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | S_009508_SYNC_GRADIENT(1) | S_009508_SYNC_WALKER(1) | - S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL, 0); } else { - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | S_009508_SYNC_GRADIENT(1) | S_009508_SYNC_WALKER(1) | - S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL, 0); } - r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -2022,38 +2024,38 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); } - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - 0, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028850_SQ_PGM_RESOURCES_PS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); + exports_ps, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all), S_028808_MULTIWRITE_ENABLE(1), - NULL); + NULL, 0); /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, S_02880C_Z_EXPORT_ENABLE(1) | S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | S_02880C_KILL_ENABLE(1), - NULL); + NULL, 0); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2081,7 +2083,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad for (i = 0; i < 10; i++) { r600_pipe_state_add_reg(rstate, R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); + spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0); } /* Certain attributes (position, psize, etc.) don't count as params. @@ -2095,22 +2097,22 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(nparams - 1), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028868_SQ_PGM_RESOURCES_VS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, - 0, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void r600_fetch_shader(struct pipe_context *ctx, @@ -2123,12 +2125,12 @@ void r600_fetch_shader(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, 0, - 0xFFFFFFFF, ve->fetch_shader); + 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) @@ -2157,7 +2159,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0); r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, S_028D0C_DEPTH_COPY_ENABLE(1) | @@ -2165,7 +2167,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), S_028D0C_DEPTH_COPY_ENABLE(1) | S_028D0C_STENCIL_COPY_ENABLE(1) | - S_028D0C_COPY_CENTROID(1), NULL); + S_028D0C_COPY_CENTROID(1), NULL, 0); return rstate; } @@ -2186,10 +2188,12 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + unsigned offset, unsigned stride, + enum radeon_bo_usage usage) { rstate->val[0] = offset; rstate->bo[0] = rbuffer->bo; + rstate->bo_usage[0] = usage; rstate->val[1] = rbuffer->bo_size - offset - 1; rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | S_038008_STRIDE(stride); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 2831517fe86..53a1313a2a8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -336,7 +336,7 @@ static void r600_update_alpha_ref(struct r600_pipe_context *rctx) rstate.nregs = 0; if (rctx->export_16bpc) alpha_ref &= ~0x1FFF; - r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &rstate); rctx->alpha_ref_dirty = false; @@ -349,7 +349,7 @@ static void r600_spi_block_init(struct r600_pipe_context *rctx, struct r600_pipe rstate->nregs = 0; rstate->id = R600_PIPE_STATE_SPI; for (i = 0; i < 32; i++) { - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL, 0); } } @@ -425,10 +425,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; @@ -441,10 +441,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } break; @@ -453,10 +453,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; @@ -468,10 +468,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } break; @@ -528,10 +528,10 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { - r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); + r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } } @@ -614,16 +614,16 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (rctx->vgt.id != R600_PIPE_STATE_VGT) { rctx->vgt.id = R600_PIPE_STATE_VGT; rctx->vgt.nregs = 0; - r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL, 0, - S_028814_PROVOKING_VTX_LAST(1), NULL); + S_028814_PROVOKING_VTX_LAST(1), NULL, 0); } @@ -675,11 +675,14 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, u32 range_id, u32 block_id, - struct r600_bo *bo) + struct r600_bo *bo, + enum radeon_bo_usage usage) { struct r600_range *range; struct r600_block *block; + if (bo) assert(usage); + range = &ctx->range[range_id]; block = range->blocks[block_id]; state->regs[state->nregs].block = block; @@ -688,6 +691,7 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; assert(state->nregs < R600_BLOCK_MAX_REG); @@ -695,13 +699,17 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, - struct r600_bo *bo) + struct r600_bo *bo, + enum radeon_bo_usage usage) { + if (bo) assert(usage); + state->regs[state->nregs].id = offset; state->regs[state->nregs].block = NULL; state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; assert(state->nregs < R600_BLOCK_MAX_REG); diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index eaf461833c7..30bb0b8223c 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -1186,7 +1186,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = r600_context_bo_reloc(ctx, draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index ba8d6c2aa64..6c5b4b8953a 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -84,7 +84,7 @@ void r600_get_backend_mask(struct r600_context *ctx) ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); /* execute */ r600_context_flush(ctx, 0); @@ -986,7 +986,7 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); } bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } @@ -1067,6 +1067,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat /* find relocation */ reloc_id = block->pm4_bo_index[id]; r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo); + block->reloc[reloc_id].bo_usage = reg->bo_usage; /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } @@ -1140,11 +1141,14 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ * we have single case btw VERTEX & TEXTURE resource */ r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; r600_bo_reference(&block->reloc[2].bo, NULL); } else { /* TEXTURE RESOURCE */ r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; r600_bo_reference(&block->reloc[2].bo, state->bo[1]); + block->reloc[2].bo_usage = state->bo_usage[1]; state->bo[0]->binding |= BO_BOUND_TEXTURE; } @@ -1279,7 +1283,6 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); int cp_dwords = block->pm4_ndwords, start_dword = 0; int new_dwords = 0; @@ -1295,13 +1298,13 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * for (int j = 0; j < block->nreg; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ - id = block->pm4_bo_index[j]; - block->pm4[block->reloc[id].bo_pm4_index] = - r600_context_bo_reloc(ctx, block->reloc[id].bo); + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); nbo--; if (nbo == 0) break; @@ -1335,7 +1338,6 @@ out: void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; int cp_dwords = block->pm4_ndwords; int nbo = block->nbo; @@ -1349,13 +1351,13 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 for (int j = 0; j < nbo; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ - id = block->pm4_bo_index[j]; - block->pm4[block->reloc[id].bo_pm4_index] = - r600_context_bo_reloc(ctx, block->reloc[id].bo); + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); } } ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; @@ -1466,7 +1468,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = r600_context_bo_reloc(ctx, draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; @@ -1561,7 +1563,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); } static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) @@ -1672,7 +1674,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->state |= R600_QUERY_STATE_STARTED; query->state ^= R600_QUERY_STATE_ENDED; @@ -1696,7 +1698,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->results_end += query->result_size; if (query->results_end >= query->buffer_size) @@ -1741,7 +1743,8 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, ctx->pm4[ctx->pm4_cdwords++] = results_base; ctx->pm4[ctx->pm4_cdwords++] = op; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, + RADEON_USAGE_READ); results_base += query->result_size; if (results_base >= query->buffer_size) results_base = 0; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index c5b82fd43ae..1e901897efd 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -95,11 +95,17 @@ void r600_context_reg(struct r600_context *ctx, void r600_init_cs(struct r600_context *ctx); int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base); -static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo) +static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo, + enum radeon_bo_usage usage) { + enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? rbo->domains : 0; + enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? rbo->domains : 0; + + assert(usage); + unsigned reloc_index = ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, - rbo->domains, rbo->domains); + rd, wd); if (reloc_index >= ctx->creloc) ctx->creloc = reloc_index+1; From a77431b3b0cda9da7dff628f65aaa804c7c96f57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 14 Aug 2011 21:21:38 +0200 Subject: [PATCH 341/600] r600g: consolidate two files r600d.h --- src/gallium/drivers/r600/r600d.h | 56 +- src/gallium/winsys/r600/drm/r600d.h | 2241 --------------------------- 2 files changed, 55 insertions(+), 2242 deletions(-) delete mode 100644 src/gallium/winsys/r600/drm/r600d.h diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index f6eec24cc05..de458cf398a 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -28,6 +28,32 @@ #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 +/* evergreen values */ +#define EG_RESOURCE_OFFSET 0x00030000 +#define EG_RESOURCE_END 0x00034000 +#define EG_LOOP_CONST_OFFSET 0x0003A200 +#define EG_LOOP_CONST_END 0x0003A26C +#define EG_BOOL_CONST_OFFSET 0x0003A500 +#define EG_BOOL_CONST_END 0x0003A506 + +#define R600_CONFIG_REG_OFFSET 0X00008000 +#define R600_CONFIG_REG_END 0X0000AC00 +#define R600_CONTEXT_REG_OFFSET 0X00028000 +#define R600_CONTEXT_REG_END 0X00029000 +#define R600_ALU_CONST_OFFSET 0X00030000 +#define R600_ALU_CONST_END 0X00032000 +#define R600_RESOURCE_OFFSET 0X00038000 +#define R600_RESOURCE_END 0X0003C000 +#define R600_SAMPLER_OFFSET 0X0003C000 +#define R600_SAMPLER_END 0X0003CFF0 +#define R600_CTL_CONST_OFFSET 0X0003CFF0 +#define R600_CTL_CONST_END 0X0003E200 +#define R600_LOOP_CONST_OFFSET 0X0003E200 +#define R600_LOOP_CONST_END 0X0003E380 +#define R600_BOOL_CONST_OFFSET 0X0003E380 +#define R600_BOOL_CONST_END 0X00040000 + + #define PKT3_NOP 0x10 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 @@ -66,11 +92,38 @@ #define PKT3_SET_SAMPLER 0x6E #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) +#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) +#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) + +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 +#define EVENT_TYPE_ZPASS_DONE 0x15 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ #define PREDICATION_OP_CLEAR 0x0 #define PREDICATION_OP_ZPASS 0x1 #define PREDICATION_OP_PRIMCOUNT 0x2 +#define PRED_OP(x) ((x) << 16) + +#define PREDICATION_CONTINUE (1 << 31) + +#define PREDICATION_HINT_WAIT (0 << 12) +#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) + +#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) +#define PREDICATION_DRAW_VISIBLE (1 << 8) + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -83,8 +136,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h deleted file mode 100644 index 4a19dcf8ddf..00000000000 --- a/src/gallium/winsys/r600/drm/r600d.h +++ /dev/null @@ -1,2241 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef R600D_H -#define R600D_H - -/* evergreen values */ -#define EG_RESOURCE_OFFSET 0x00030000 -#define EG_RESOURCE_END 0x00034000 -#define EG_LOOP_CONST_OFFSET 0x0003A200 -#define EG_LOOP_CONST_END 0x0003A26C -#define EG_BOOL_CONST_OFFSET 0x0003A500 -#define EG_BOOL_CONST_END 0x0003A506 - - -#define R600_CONFIG_REG_OFFSET 0X00008000 -#define R600_CONFIG_REG_END 0X0000AC00 -#define R600_CONTEXT_REG_OFFSET 0X00028000 -#define R600_CONTEXT_REG_END 0X00029000 -#define R600_ALU_CONST_OFFSET 0X00030000 -#define R600_ALU_CONST_END 0X00032000 -#define R600_RESOURCE_OFFSET 0X00038000 -#define R600_RESOURCE_END 0X0003C000 -#define R600_SAMPLER_OFFSET 0X0003C000 -#define R600_SAMPLER_END 0X0003CFF0 -#define R600_CTL_CONST_OFFSET 0X0003CFF0 -#define R600_CTL_CONST_END 0X0003E200 -#define R600_LOOP_CONST_OFFSET 0X0003E200 -#define R600_LOOP_CONST_END 0X0003E380 -#define R600_BOOL_CONST_OFFSET 0X0003E380 -#define R600_BOOL_CONST_END 0X00040000 - -#define PKT3_NOP 0x10 -#define PKT3_INDIRECT_BUFFER_END 0x17 -#define PKT3_SET_PREDICATION 0x20 -#define PKT3_REG_RMW 0x21 -#define PKT3_COND_EXEC 0x22 -#define PKT3_PRED_EXEC 0x23 -#define PKT3_START_3D_CMDBUF 0x24 -#define PKT3_DRAW_INDEX_2 0x27 -#define PKT3_CONTEXT_CONTROL 0x28 -#define PKT3_DRAW_INDEX_IMMD_BE 0x29 -#define PKT3_INDEX_TYPE 0x2A -#define PKT3_DRAW_INDEX 0x2B -#define PKT3_DRAW_INDEX_AUTO 0x2D -#define PKT3_DRAW_INDEX_IMMD 0x2E -#define PKT3_NUM_INSTANCES 0x2F -#define PKT3_STRMOUT_BUFFER_UPDATE 0x34 -#define PKT3_INDIRECT_BUFFER_MP 0x38 -#define PKT3_MEM_SEMAPHORE 0x39 -#define PKT3_MPEG_INDEX 0x3A -#define PKT3_WAIT_REG_MEM 0x3C -#define PKT3_MEM_WRITE 0x3D -#define PKT3_INDIRECT_BUFFER 0x32 -#define PKT3_CP_INTERRUPT 0x40 -#define PKT3_SURFACE_SYNC 0x43 -#define PKT3_ME_INITIALIZE 0x44 -#define PKT3_COND_WRITE 0x45 -#define PKT3_EVENT_WRITE 0x46 -#define PKT3_EVENT_WRITE_EOP 0x47 -#define PKT3_ONE_REG_WRITE 0x57 -#define PKT3_SET_CONFIG_REG 0x68 -#define PKT3_SET_CONTEXT_REG 0x69 -#define PKT3_SET_ALU_CONST 0x6A -#define PKT3_SET_BOOL_CONST 0x6B -#define PKT3_SET_LOOP_CONST 0x6C -#define PKT3_SET_RESOURCE 0x6D -#define PKT3_SET_SAMPLER 0x6E -#define PKT3_SET_CTL_CONST 0x6F -#define PKT3_SURFACE_BASE_UPDATE 0x73 -#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) -#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) -#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) - -#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 -#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 -#define EVENT_TYPE_ZPASS_DONE 0x15 -#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) - /* 0 - any non-TS event - * 1 - ZPASS_DONE - * 2 - SAMPLE_PIPELINESTAT - * 3 - SAMPLE_STREAMOUTSTAT* - * 4 - *S_PARTIAL_FLUSH - * 5 - TS events - */ - -#define PREDICATION_OP_CLEAR 0x0 -#define PREDICATION_OP_ZPASS 0x1 -#define PREDICATION_OP_PRIMCOUNT 0x2 - -#define PRED_OP(x) ((x) << 16) - -#define PREDICATION_CONTINUE (1 << 31) - -#define PREDICATION_HINT_WAIT (0 << 12) -#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) - -#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) -#define PREDICATION_DRAW_VISIBLE (1 << 8) - -#define PKT_TYPE_S(x) (((x) & 0x3) << 30) -#define PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define PKT_TYPE_C 0x3FFFFFFF -#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define PKT_COUNT_C 0xC000FFFF -#define PKT0_BASE_INDEX_S(x) (((x) & 0xFFFF) << 0) -#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define PKT0_BASE_INDEX_C 0xFFFF0000 -#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) -#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) -#define PKT3_IT_OPCODE_C 0xFFFF00FF -#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) -#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) - -/* Registers */ -#define R_0280A0_CB_COLOR0_INFO 0x0280A0 -#define S_0280A0_ENDIAN(x) (((x) & 0x3) << 0) -#define G_0280A0_ENDIAN(x) (((x) >> 0) & 0x3) -#define C_0280A0_ENDIAN 0xFFFFFFFC -#define S_0280A0_FORMAT(x) (((x) & 0x3F) << 2) -#define G_0280A0_FORMAT(x) (((x) >> 2) & 0x3F) -#define C_0280A0_FORMAT 0xFFFFFF03 -#define V_0280A0_COLOR_INVALID 0x00000000 -#define V_0280A0_COLOR_8 0x00000001 -#define V_0280A0_COLOR_4_4 0x00000002 -#define V_0280A0_COLOR_3_3_2 0x00000003 -#define V_0280A0_COLOR_16 0x00000005 -#define V_0280A0_COLOR_16_FLOAT 0x00000006 -#define V_0280A0_COLOR_8_8 0x00000007 -#define V_0280A0_COLOR_5_6_5 0x00000008 -#define V_0280A0_COLOR_6_5_5 0x00000009 -#define V_0280A0_COLOR_1_5_5_5 0x0000000A -#define V_0280A0_COLOR_4_4_4_4 0x0000000B -#define V_0280A0_COLOR_5_5_5_1 0x0000000C -#define V_0280A0_COLOR_32 0x0000000D -#define V_0280A0_COLOR_32_FLOAT 0x0000000E -#define V_0280A0_COLOR_16_16 0x0000000F -#define V_0280A0_COLOR_16_16_FLOAT 0x00000010 -#define V_0280A0_COLOR_8_24 0x00000011 -#define V_0280A0_COLOR_8_24_FLOAT 0x00000012 -#define V_0280A0_COLOR_24_8 0x00000013 -#define V_0280A0_COLOR_24_8_FLOAT 0x00000014 -#define V_0280A0_COLOR_10_11_11 0x00000015 -#define V_0280A0_COLOR_10_11_11_FLOAT 0x00000016 -#define V_0280A0_COLOR_11_11_10 0x00000017 -#define V_0280A0_COLOR_11_11_10_FLOAT 0x00000018 -#define V_0280A0_COLOR_2_10_10_10 0x00000019 -#define V_0280A0_COLOR_8_8_8_8 0x0000001A -#define V_0280A0_COLOR_10_10_10_2 0x0000001B -#define V_0280A0_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_0280A0_COLOR_32_32 0x0000001D -#define V_0280A0_COLOR_32_32_FLOAT 0x0000001E -#define V_0280A0_COLOR_16_16_16_16 0x0000001F -#define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_0280A0_COLOR_32_32_32_32 0x00000022 -#define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023 -#define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8) -#define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF) -#define C_0280A0_ARRAY_MODE 0xFFFFF0FF -#define V_0280A0_ARRAY_LINEAR_GENERAL 0x00000000 -#define V_0280A0_ARRAY_LINEAR_ALIGNED 0x00000001 -#define V_0280A0_ARRAY_1D_TILED_THIN1 0x00000002 -#define V_0280A0_ARRAY_2D_TILED_THIN1 0x00000004 -#define S_0280A0_NUMBER_TYPE(x) (((x) & 0x7) << 12) -#define G_0280A0_NUMBER_TYPE(x) (((x) >> 12) & 0x7) -#define C_0280A0_NUMBER_TYPE 0xFFFF8FFF -#define S_0280A0_READ_SIZE(x) (((x) & 0x1) << 15) -#define G_0280A0_READ_SIZE(x) (((x) >> 15) & 0x1) -#define C_0280A0_READ_SIZE 0xFFFF7FFF -#define S_0280A0_COMP_SWAP(x) (((x) & 0x3) << 16) -#define G_0280A0_COMP_SWAP(x) (((x) >> 16) & 0x3) -#define C_0280A0_COMP_SWAP 0xFFFCFFFF -#define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) -#define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) -#define C_0280A0_TILE_MODE 0xFFF3FFFF -#define S_0280A0_BLEND_CLAMP(x) (((x) & 0x1) << 20) -#define G_0280A0_BLEND_CLAMP(x) (((x) >> 20) & 0x1) -#define C_0280A0_BLEND_CLAMP 0xFFEFFFFF -#define S_0280A0_CLEAR_COLOR(x) (((x) & 0x1) << 21) -#define G_0280A0_CLEAR_COLOR(x) (((x) >> 21) & 0x1) -#define C_0280A0_CLEAR_COLOR 0xFFDFFFFF -#define S_0280A0_BLEND_BYPASS(x) (((x) & 0x1) << 22) -#define G_0280A0_BLEND_BYPASS(x) (((x) >> 22) & 0x1) -#define C_0280A0_BLEND_BYPASS 0xFFBFFFFF -#define S_0280A0_BLEND_FLOAT32(x) (((x) & 0x1) << 23) -#define G_0280A0_BLEND_FLOAT32(x) (((x) >> 23) & 0x1) -#define C_0280A0_BLEND_FLOAT32 0xFF7FFFFF -#define S_0280A0_SIMPLE_FLOAT(x) (((x) & 0x1) << 24) -#define G_0280A0_SIMPLE_FLOAT(x) (((x) >> 24) & 0x1) -#define C_0280A0_SIMPLE_FLOAT 0xFEFFFFFF -#define S_0280A0_ROUND_MODE(x) (((x) & 0x1) << 25) -#define G_0280A0_ROUND_MODE(x) (((x) >> 25) & 0x1) -#define C_0280A0_ROUND_MODE 0xFDFFFFFF -#define S_0280A0_TILE_COMPACT(x) (((x) & 0x1) << 26) -#define G_0280A0_TILE_COMPACT(x) (((x) >> 26) & 0x1) -#define C_0280A0_TILE_COMPACT 0xFBFFFFFF -#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) -#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) -#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF -#define R_028060_CB_COLOR0_SIZE 0x028060 -#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028060_SLICE_TILE_MAX 0xC00003FF -#define R_028800_DB_DEPTH_CONTROL 0x028800 -#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0) -#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028800_STENCIL_ENABLE 0xFFFFFFFE -#define S_028800_Z_ENABLE(x) (((x) & 0x1) << 1) -#define G_028800_Z_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028800_Z_ENABLE 0xFFFFFFFD -#define S_028800_Z_WRITE_ENABLE(x) (((x) & 0x1) << 2) -#define G_028800_Z_WRITE_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028800_Z_WRITE_ENABLE 0xFFFFFFFB -#define S_028800_ZFUNC(x) (((x) & 0x7) << 4) -#define G_028800_ZFUNC(x) (((x) >> 4) & 0x7) -#define C_028800_ZFUNC 0xFFFFFF8F -#define S_028800_BACKFACE_ENABLE(x) (((x) & 0x1) << 7) -#define G_028800_BACKFACE_ENABLE(x) (((x) >> 7) & 0x1) -#define C_028800_BACKFACE_ENABLE 0xFFFFFF7F -#define S_028800_STENCILFUNC(x) (((x) & 0x7) << 8) -#define G_028800_STENCILFUNC(x) (((x) >> 8) & 0x7) -#define C_028800_STENCILFUNC 0xFFFFF8FF -#define S_028800_STENCILFAIL(x) (((x) & 0x7) << 11) -#define G_028800_STENCILFAIL(x) (((x) >> 11) & 0x7) -#define C_028800_STENCILFAIL 0xFFFFC7FF -#define S_028800_STENCILZPASS(x) (((x) & 0x7) << 14) -#define G_028800_STENCILZPASS(x) (((x) >> 14) & 0x7) -#define C_028800_STENCILZPASS 0xFFFE3FFF -#define S_028800_STENCILZFAIL(x) (((x) & 0x7) << 17) -#define G_028800_STENCILZFAIL(x) (((x) >> 17) & 0x7) -#define C_028800_STENCILZFAIL 0xFFF1FFFF -#define S_028800_STENCILFUNC_BF(x) (((x) & 0x7) << 20) -#define G_028800_STENCILFUNC_BF(x) (((x) >> 20) & 0x7) -#define C_028800_STENCILFUNC_BF 0xFF8FFFFF -#define S_028800_STENCILFAIL_BF(x) (((x) & 0x7) << 23) -#define G_028800_STENCILFAIL_BF(x) (((x) >> 23) & 0x7) -#define C_028800_STENCILFAIL_BF 0xFC7FFFFF -#define S_028800_STENCILZPASS_BF(x) (((x) & 0x7) << 26) -#define G_028800_STENCILZPASS_BF(x) (((x) >> 26) & 0x7) -#define C_028800_STENCILZPASS_BF 0xE3FFFFFF -#define S_028800_STENCILZFAIL_BF(x) (((x) & 0x7) << 29) -#define G_028800_STENCILZFAIL_BF(x) (((x) >> 29) & 0x7) -#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF -#define R_028010_DB_DEPTH_INFO 0x028010 -#define S_028010_FORMAT(x) (((x) & 0x7) << 0) -#define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -#define C_028010_FORMAT 0xFFFFFFF8 -#define V_028010_DEPTH_INVALID 0x00000000 -#define V_028010_DEPTH_16 0x00000001 -#define V_028010_DEPTH_X8_24 0x00000002 -#define V_028010_DEPTH_8_24 0x00000003 -#define V_028010_DEPTH_X8_24_FLOAT 0x00000004 -#define V_028010_DEPTH_8_24_FLOAT 0x00000005 -#define V_028010_DEPTH_32_FLOAT 0x00000006 -#define V_028010_DEPTH_X24_8_32_FLOAT 0x00000007 -#define S_028010_READ_SIZE(x) (((x) & 0x1) << 3) -#define G_028010_READ_SIZE(x) (((x) >> 3) & 0x1) -#define C_028010_READ_SIZE 0xFFFFFFF7 -#define S_028010_ARRAY_MODE(x) (((x) & 0xF) << 15) -#define G_028010_ARRAY_MODE(x) (((x) >> 15) & 0xF) -#define C_028010_ARRAY_MODE 0xFFF87FFF -#define S_028010_TILE_SURFACE_ENABLE(x) (((x) & 0x1) << 25) -#define G_028010_TILE_SURFACE_ENABLE(x) (((x) >> 25) & 0x1) -#define C_028010_TILE_SURFACE_ENABLE 0xFDFFFFFF -#define S_028010_TILE_COMPACT(x) (((x) & 0x1) << 26) -#define G_028010_TILE_COMPACT(x) (((x) >> 26) & 0x1) -#define C_028010_TILE_COMPACT 0xFBFFFFFF -#define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) -#define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) -#define C_028010_ZRANGE_PRECISION 0x7FFFFFFF -#define R_028000_DB_DEPTH_SIZE 0x028000 -#define S_028000_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028000_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028000_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028000_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028000_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028000_SLICE_TILE_MAX 0xC00003FF -#define R_028004_DB_DEPTH_VIEW 0x028004 -#define S_028004_SLICE_START(x) (((x) & 0x7FF) << 0) -#define G_028004_SLICE_START(x) (((x) >> 0) & 0x7FF) -#define C_028004_SLICE_START 0xFFFFF800 -#define S_028004_SLICE_MAX(x) (((x) & 0x7FF) << 13) -#define G_028004_SLICE_MAX(x) (((x) >> 13) & 0x7FF) -#define C_028004_SLICE_MAX 0xFF001FFF -#define R_028D24_DB_HTILE_SURFACE 0x028D24 -#define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0) -#define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1) -#define C_028D24_HTILE_WIDTH 0xFFFFFFFE -#define S_028D24_HTILE_HEIGHT(x) (((x) & 0x1) << 1) -#define G_028D24_HTILE_HEIGHT(x) (((x) >> 1) & 0x1) -#define C_028D24_HTILE_HEIGHT 0xFFFFFFFD -#define S_028D24_LINEAR(x) (((x) & 0x1) << 2) -#define G_028D24_LINEAR(x) (((x) >> 2) & 0x1) -#define C_028D24_LINEAR 0xFFFFFFFB -#define S_028D24_FULL_CACHE(x) (((x) & 0x1) << 3) -#define G_028D24_FULL_CACHE(x) (((x) >> 3) & 0x1) -#define C_028D24_FULL_CACHE 0xFFFFFFF7 -#define S_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) & 0x1) << 4) -#define G_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) >> 4) & 0x1) -#define C_028D24_HTILE_USES_PRELOAD_WIN 0xFFFFFFEF -#define S_028D24_PRELOAD(x) (((x) & 0x1) << 5) -#define G_028D24_PRELOAD(x) (((x) >> 5) & 0x1) -#define C_028D24_PRELOAD 0xFFFFFFDF -#define S_028D24_PREFETCH_WIDTH(x) (((x) & 0x3F) << 6) -#define G_028D24_PREFETCH_WIDTH(x) (((x) >> 6) & 0x3F) -#define C_028D24_PREFETCH_WIDTH 0xFFFFF03F -#define S_028D24_PREFETCH_HEIGHT(x) (((x) & 0x3F) << 12) -#define G_028D24_PREFETCH_HEIGHT(x) (((x) >> 12) & 0x3F) -#define C_028D24_PREFETCH_HEIGHT 0xFFFC0FFF -#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 -#define S_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define S_028D10_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) -#define G_028D10_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) -#define C_028D10_FORCE_HIZ_ENABLE 0xFFFFFFFC -#define S_028D10_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) -#define G_028D10_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE0 0xFFFFFFF3 -#define S_028D10_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) -#define G_028D10_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE1 0xFFFFFFCF -#define S_028D10_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) -#define G_028D10_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) -#define C_028D10_FORCE_SHADER_Z_ORDER 0xFFFFFFBF -#define S_028D10_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) -#define G_028D10_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) -#define C_028D10_FAST_Z_DISABLE 0xFFFFFF7F -#define S_028D10_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) -#define G_028D10_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) -#define C_028D10_FAST_STENCIL_DISABLE 0xFFFFFEFF -#define S_028D10_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) -#define G_028D10_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) -#define C_028D10_NOOP_CULL_DISABLE 0xFFFFFDFF -#define S_028D10_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) -#define G_028D10_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) -#define C_028D10_FORCE_COLOR_KILL 0xFFFFFBFF -#define S_028D10_FORCE_Z_READ(x) (((x) & 0x1) << 11) -#define G_028D10_FORCE_Z_READ(x) (((x) >> 11) & 0x1) -#define C_028D10_FORCE_Z_READ 0xFFFFF7FF -#define S_028D10_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) -#define G_028D10_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) -#define C_028D10_FORCE_STENCIL_READ 0xFFFFEFFF -#define S_028D10_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) -#define G_028D10_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) -#define C_028D10_FORCE_FULL_Z_RANGE 0xFFFF9FFF -#define S_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) -#define G_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) -#define C_028D10_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF -#define S_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) -#define G_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) -#define C_028D10_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF -#define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) -#define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) -#define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF -#define R_028A40_VGT_GS_MODE 0x028A40 -#define S_028A40_MODE(x) (((x) & 0x3) << 0) -#define G_028A40_MODE(x) (((x) >> 0) & 0x3) -#define C_028A40_MODE 0xFFFFFFFC -#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) -#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) -#define C_028A40_ES_PASSTHRU 0xFFFFFFFB -#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) -#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) -#define C_028A40_CUT_MODE 0xFFFFFFE7 -#define R_008DFC_SQ_CF_WORD0 0x008DFC -#define S_008DFC_ADDR(x) (((x) & 0xFFFFFFFF) << 0) -#define G_008DFC_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_008DFC_ADDR 0x00000000 -#define R_008DFC_SQ_CF_WORD1 0x008DFC -#define S_008DFC_POP_COUNT(x) (((x) & 0x7) << 0) -#define G_008DFC_POP_COUNT(x) (((x) >> 0) & 0x7) -#define C_008DFC_POP_COUNT 0xFFFFFFF8 -#define S_008DFC_CF_CONST(x) (((x) & 0x1F) << 3) -#define G_008DFC_CF_CONST(x) (((x) >> 3) & 0x1F) -#define C_008DFC_CF_CONST 0xFFFFFF07 -#define S_008DFC_COND(x) (((x) & 0x3) << 8) -#define G_008DFC_COND(x) (((x) >> 8) & 0x3) -#define C_008DFC_COND 0xFFFFFCFF -#define S_008DFC_COUNT(x) (((x) & 0x7) << 10) -#define G_008DFC_COUNT(x) (((x) >> 10) & 0x7) -#define C_008DFC_COUNT 0xFFFFE3FF -#define S_008DFC_CALL_COUNT(x) (((x) & 0x3F) << 13) -#define G_008DFC_CALL_COUNT(x) (((x) >> 13) & 0x3F) -#define C_008DFC_CALL_COUNT 0xFFF81FFF -#define S_008DFC_END_OF_PROGRAM(x) (((x) & 0x1) << 21) -#define G_008DFC_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) -#define C_008DFC_END_OF_PROGRAM 0xFFDFFFFF -#define S_008DFC_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) -#define G_008DFC_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) -#define C_008DFC_VALID_PIXEL_MODE 0xFFBFFFFF -#define S_008DFC_CF_INST(x) (((x) & 0x7F) << 23) -#define G_008DFC_CF_INST(x) (((x) >> 23) & 0x7F) -#define C_008DFC_CF_INST 0xC07FFFFF -#define V_008DFC_SQ_CF_INST_NOP 0x00000000 -#define V_008DFC_SQ_CF_INST_TEX 0x00000001 -#define V_008DFC_SQ_CF_INST_VTX 0x00000002 -#define V_008DFC_SQ_CF_INST_VTX_TC 0x00000003 -#define V_008DFC_SQ_CF_INST_LOOP_START 0x00000004 -#define V_008DFC_SQ_CF_INST_LOOP_END 0x00000005 -#define V_008DFC_SQ_CF_INST_LOOP_START_DX10 0x00000006 -#define V_008DFC_SQ_CF_INST_LOOP_START_NO_AL 0x00000007 -#define V_008DFC_SQ_CF_INST_LOOP_CONTINUE 0x00000008 -#define V_008DFC_SQ_CF_INST_LOOP_BREAK 0x00000009 -#define V_008DFC_SQ_CF_INST_JUMP 0x0000000A -#define V_008DFC_SQ_CF_INST_PUSH 0x0000000B -#define V_008DFC_SQ_CF_INST_PUSH_ELSE 0x0000000C -#define V_008DFC_SQ_CF_INST_ELSE 0x0000000D -#define V_008DFC_SQ_CF_INST_POP 0x0000000E -#define V_008DFC_SQ_CF_INST_POP_JUMP 0x0000000F -#define V_008DFC_SQ_CF_INST_POP_PUSH 0x00000010 -#define V_008DFC_SQ_CF_INST_POP_PUSH_ELSE 0x00000011 -#define V_008DFC_SQ_CF_INST_CALL 0x00000012 -#define V_008DFC_SQ_CF_INST_CALL_FS 0x00000013 -#define V_008DFC_SQ_CF_INST_RETURN 0x00000014 -#define V_008DFC_SQ_CF_INST_EMIT_VERTEX 0x00000015 -#define V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX 0x00000016 -#define V_008DFC_SQ_CF_INST_CUT_VERTEX 0x00000017 -#define V_008DFC_SQ_CF_INST_KILL 0x00000018 -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALU_WORD0 0x008DFC -#define S_008DFC_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) -#define G_008DFC_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) -#define C_008DFC_ALU_ADDR 0xFFC00000 -#define S_008DFC_KCACHE_BANK0(x) (((x) & 0xF) << 22) -#define G_008DFC_KCACHE_BANK0(x) (((x) >> 22) & 0xF) -#define C_008DFC_KCACHE_BANK0 0xFC3FFFFF -#define S_008DFC_KCACHE_BANK1(x) (((x) & 0xF) << 26) -#define G_008DFC_KCACHE_BANK1(x) (((x) >> 26) & 0xF) -#define C_008DFC_KCACHE_BANK1 0xC3FFFFFF -#define S_008DFC_KCACHE_MODE0(x) (((x) & 0x3) << 30) -#define G_008DFC_KCACHE_MODE0(x) (((x) >> 30) & 0x3) -#define C_008DFC_KCACHE_MODE0 0x3FFFFFFF -#define R_008DFC_SQ_CF_ALU_WORD1 0x008DFC -#define S_008DFC_KCACHE_MODE1(x) (((x) & 0x3) << 0) -#define G_008DFC_KCACHE_MODE1(x) (((x) >> 0) & 0x3) -#define C_008DFC_KCACHE_MODE1 0xFFFFFFFC -#define S_008DFC_KCACHE_ADDR0(x) (((x) & 0xFF) << 2) -#define G_008DFC_KCACHE_ADDR0(x) (((x) >> 2) & 0xFF) -#define C_008DFC_KCACHE_ADDR0 0xFFFFFC03 -#define S_008DFC_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) -#define G_008DFC_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) -#define C_008DFC_KCACHE_ADDR1 0xFFFC03FF -#define S_008DFC_ALU_COUNT(x) (((x) & 0x7F) << 18) -#define G_008DFC_ALU_COUNT(x) (((x) >> 18) & 0x7F) -#define C_008DFC_ALU_COUNT 0xFE03FFFF -#define S_008DFC_USES_WATERFALL(x) (((x) & 0x1) << 25) -#define G_008DFC_USES_WATERFALL(x) (((x) >> 25) & 0x1) -#define C_008DFC_USES_WATERFALL 0xFDFFFFFF -#define S_008DFC_CF_ALU_INST(x) (((x) & 0xF) << 26) -#define G_008DFC_CF_ALU_INST(x) (((x) >> 26) & 0xF) -#define C_008DFC_CF_ALU_INST 0xC3FFFFFF -#define V_008DFC_SQ_CF_INST_ALU 0x00000008 -#define V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 -#define V_008DFC_SQ_CF_INST_ALU_POP_AFTER 0x0000000A -#define V_008DFC_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B -#define V_008DFC_SQ_CF_INST_ALU_CONTINUE 0x0000000D -#define V_008DFC_SQ_CF_INST_ALU_BREAK 0x0000000E -#define V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD0 0x008DFC -#define S_008DFC_ARRAY_BASE(x) (((x) & 0x1FFF) << 0) -#define G_008DFC_ARRAY_BASE(x) (((x) >> 0) & 0x1FFF) -#define C_008DFC_ARRAY_BASE 0xFFFFE000 -#define S_008DFC_TYPE(x) (((x) & 0x3) << 13) -#define G_008DFC_TYPE(x) (((x) >> 13) & 0x3) -#define C_008DFC_TYPE 0xFFFF9FFF -#define S_008DFC_RW_GPR(x) (((x) & 0x7F) << 15) -#define G_008DFC_RW_GPR(x) (((x) >> 15) & 0x7F) -#define C_008DFC_RW_GPR 0xFFC07FFF -#define S_008DFC_RW_REL(x) (((x) & 0x1) << 22) -#define G_008DFC_RW_REL(x) (((x) >> 22) & 0x1) -#define C_008DFC_RW_REL 0xFFBFFFFF -#define S_008DFC_INDEX_GPR(x) (((x) & 0x7F) << 23) -#define G_008DFC_INDEX_GPR(x) (((x) >> 23) & 0x7F) -#define C_008DFC_INDEX_GPR 0xC07FFFFF -#define S_008DFC_ELEM_SIZE(x) (((x) & 0x3) << 30) -#define G_008DFC_ELEM_SIZE(x) (((x) >> 30) & 0x3) -#define C_008DFC_ELEM_SIZE 0x3FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1 0x008DFC -#define S_008DFC_BURST_COUNT(x) (((x) & 0xF) << 17) -#define G_008DFC_BURST_COUNT(x) (((x) >> 17) & 0xF) -#define C_008DFC_BURST_COUNT 0xFFE1FFFF -#define S_008DFC_END_OF_PROGRAM(x) (((x) & 0x1) << 21) -#define G_008DFC_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) -#define C_008DFC_END_OF_PROGRAM 0xFFDFFFFF -#define S_008DFC_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) -#define G_008DFC_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) -#define C_008DFC_VALID_PIXEL_MODE 0xFFBFFFFF -#define S_008DFC_CF_INST(x) (((x) & 0x7F) << 23) -#define G_008DFC_CF_INST(x) (((x) >> 23) & 0x7F) -#define C_008DFC_CF_INST 0xC07FFFFF -#define V_008DFC_SQ_CF_INST_MEM_STREAM0 0x00000020 -#define V_008DFC_SQ_CF_INST_MEM_STREAM1 0x00000021 -#define V_008DFC_SQ_CF_INST_MEM_STREAM2 0x00000022 -#define V_008DFC_SQ_CF_INST_MEM_STREAM3 0x00000023 -#define V_008DFC_SQ_CF_INST_MEM_SCRATCH 0x00000024 -#define V_008DFC_SQ_CF_INST_MEM_REDUCTION 0x00000025 -#define V_008DFC_SQ_CF_INST_MEM_RING 0x00000026 -#define V_008DFC_SQ_CF_INST_EXPORT 0x00000027 -#define V_008DFC_SQ_CF_INST_EXPORT_DONE 0x00000028 -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_BUF 0x008DFC -#define S_008DFC_ARRAY_SIZE(x) (((x) & 0xFFF) << 0) -#define G_008DFC_ARRAY_SIZE(x) (((x) >> 0) & 0xFFF) -#define C_008DFC_ARRAY_SIZE 0xFFFFF000 -#define S_008DFC_COMP_MASK(x) (((x) & 0xF) << 12) -#define G_008DFC_COMP_MASK(x) (((x) >> 12) & 0xF) -#define C_008DFC_COMP_MASK 0xFFFF0FFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ 0x008DFC -#define S_008DFC_SEL_X(x) (((x) & 0x7) << 0) -#define G_008DFC_SEL_X(x) (((x) >> 0) & 0x7) -#define C_008DFC_SEL_X 0xFFFFFFF8 -#define S_008DFC_SEL_Y(x) (((x) & 0x7) << 3) -#define G_008DFC_SEL_Y(x) (((x) >> 3) & 0x7) -#define C_008DFC_SEL_Y 0xFFFFFFC7 -#define S_008DFC_SEL_Z(x) (((x) & 0x7) << 6) -#define G_008DFC_SEL_Z(x) (((x) >> 6) & 0x7) -#define C_008DFC_SEL_Z 0xFFFFFE3F -#define S_008DFC_SEL_W(x) (((x) & 0x7) << 9) -#define G_008DFC_SEL_W(x) (((x) >> 9) & 0x7) -#define C_008DFC_SEL_W 0xFFFFF1FF -#define R_008DFC_SQ_VTX_WORD0 0x008DFC -#define S_008DFC_VTX_INST(x) (((x) & 0x1F) << 0) -#define G_008DFC_VTX_INST(x) (((x) >> 0) & 0x1F) -#define C_008DFC_VTX_INST 0xFFFFFFE0 -#define S_008DFC_FETCH_TYPE(x) (((x) & 0x3) << 5) -#define G_008DFC_FETCH_TYPE(x) (((x) >> 5) & 0x3) -#define C_008DFC_FETCH_TYPE 0xFFFFFF9F -#define S_008DFC_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) -#define G_008DFC_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) -#define C_008DFC_FETCH_WHOLE_QUAD 0xFFFFFF7F -#define S_008DFC_BUFFER_ID(x) (((x) & 0xFF) << 8) -#define G_008DFC_BUFFER_ID(x) (((x) >> 8) & 0xFF) -#define C_008DFC_BUFFER_ID 0xFFFF00FF -#define S_008DFC_SRC_GPR(x) (((x) & 0x7F) << 16) -#define G_008DFC_SRC_GPR(x) (((x) >> 16) & 0x7F) -#define C_008DFC_SRC_GPR 0xFF80FFFF -#define S_008DFC_SRC_REL(x) (((x) & 0x1) << 23) -#define G_008DFC_SRC_REL(x) (((x) >> 23) & 0x1) -#define C_008DFC_SRC_REL 0xFF7FFFFF -#define S_008DFC_SRC_SEL_X(x) (((x) & 0x3) << 24) -#define G_008DFC_SRC_SEL_X(x) (((x) >> 24) & 0x3) -#define C_008DFC_SRC_SEL_X 0xFCFFFFFF -#define S_008DFC_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26) -#define G_008DFC_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F) -#define C_008DFC_MEGA_FETCH_COUNT 0x03FFFFFF -#define R_008DFC_SQ_VTX_WORD1 0x008DFC -#define S_008DFC_DST_SEL_X(x) (((x) & 0x7) << 9) -#define G_008DFC_DST_SEL_X(x) (((x) >> 9) & 0x7) -#define C_008DFC_DST_SEL_X 0xFFFFF1FF -#define S_008DFC_DST_SEL_Y(x) (((x) & 0x7) << 12) -#define G_008DFC_DST_SEL_Y(x) (((x) >> 12) & 0x7) -#define C_008DFC_DST_SEL_Y 0xFFFF8FFF -#define S_008DFC_DST_SEL_Z(x) (((x) & 0x7) << 15) -#define G_008DFC_DST_SEL_Z(x) (((x) >> 15) & 0x7) -#define C_008DFC_DST_SEL_Z 0xFFFC7FFF -#define S_008DFC_DST_SEL_W(x) (((x) & 0x7) << 18) -#define G_008DFC_DST_SEL_W(x) (((x) >> 18) & 0x7) -#define C_008DFC_DST_SEL_W 0xFFE3FFFF -#define S_008DFC_USE_CONST_FIELDS(x) (((x) & 0x1) << 21) -#define G_008DFC_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1) -#define C_008DFC_USE_CONST_FIELDS 0xFFDFFFFF -#define S_008DFC_DATA_FORMAT(x) (((x) & 0x3F) << 22) -#define G_008DFC_DATA_FORMAT(x) (((x) >> 22) & 0x3F) -#define C_008DFC_DATA_FORMAT 0xF03FFFFF -#define S_008DFC_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28) -#define G_008DFC_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3) -#define C_008DFC_NUM_FORMAT_ALL 0xCFFFFFFF -#define S_008DFC_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30) -#define G_008DFC_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1) -#define C_008DFC_FORMAT_COMP_ALL 0xBFFFFFFF -#define S_008DFC_SRF_MODE_ALL(x) (((x) & 0x1) << 31) -#define G_008DFC_SRF_MODE_ALL(x) (((x) >> 31) & 0x1) -#define C_008DFC_SRF_MODE_ALL 0x7FFFFFFF -#define R_008DFC_SQ_VTX_WORD1_GPR 0x008DFC -#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 0) -#define G_008DFC_DST_GPR(x) (((x) >> 0) & 0x7F) -#define C_008DFC_DST_GPR 0xFFFFFF80 -#define S_008DFC_DST_REL(x) (((x) & 0x1) << 7) -#define G_008DFC_DST_REL(x) (((x) >> 7) & 0x1) -#define C_008DFC_DST_REL 0xFFFFFF7F -#define R_008DFC_SQ_VTX_WORD2 0x008DFC -#define S_008DFC_OFFSET(x) (((x) & 0xFFFF) << 0) -#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFFF) -#define C_008DFC_OFFSET 0xFFFF0000 -#define S_008DFC_ENDIAN_SWAP(x) (((x) & 0x3) << 16) -#define G_008DFC_ENDIAN_SWAP(x) (((x) >> 16) & 0x3) -#define C_008DFC_ENDIAN_SWAP 0xFFFCFFFF -#define S_008DFC_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18) -#define G_008DFC_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1) -#define C_008DFC_CONST_BUF_NO_STRIDE 0xFFFBFFFF -#define S_008DFC_MEGA_FETCH(x) (((x) & 0x1) << 19) -#define G_008DFC_MEGA_FETCH(x) (((x) >> 19) & 0x1) -#define C_008DFC_MEGA_FETCH 0xFFF7FFFF -#define S_008DFC_ALT_CONST(x) (((x) & 0x1) << 20) -#define G_008DFC_ALT_CONST(x) (((x) >> 20) & 0x1) -#define C_008DFC_ALT_CONST 0xFFEFFFFF -#define R_008040_WAIT_UNTIL 0x008040 -#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) -#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) -#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF -#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) -#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) -#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF -#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) -#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) -#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF -#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) -#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) -#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF -#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) -#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) -#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF -#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) -#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) -#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF -#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) -#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) -#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF -#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) -#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) -#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF -#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -#define S_0286CC_NUM_INTERP(x) (((x) & 0x3F) << 0) -#define G_0286CC_NUM_INTERP(x) (((x) >> 0) & 0x3F) -#define C_0286CC_NUM_INTERP 0xFFFFFFC0 -#define S_0286CC_POSITION_ENA(x) (((x) & 0x1) << 8) -#define G_0286CC_POSITION_ENA(x) (((x) >> 8) & 0x1) -#define C_0286CC_POSITION_ENA 0xFFFFFEFF -#define S_0286CC_POSITION_CENTROID(x) (((x) & 0x1) << 9) -#define G_0286CC_POSITION_CENTROID(x) (((x) >> 9) & 0x1) -#define C_0286CC_POSITION_CENTROID 0xFFFFFDFF -#define S_0286CC_POSITION_ADDR(x) (((x) & 0x1F) << 10) -#define G_0286CC_POSITION_ADDR(x) (((x) >> 10) & 0x1F) -#define C_0286CC_POSITION_ADDR 0xFFFF83FF -#define S_0286CC_PARAM_GEN(x) (((x) & 0xF) << 15) -#define G_0286CC_PARAM_GEN(x) (((x) >> 15) & 0xF) -#define C_0286CC_PARAM_GEN 0xFFF87FFF -#define S_0286CC_PARAM_GEN_ADDR(x) (((x) & 0x7F) << 19) -#define G_0286CC_PARAM_GEN_ADDR(x) (((x) >> 19) & 0x7F) -#define C_0286CC_PARAM_GEN_ADDR 0xFC07FFFF -#define S_0286CC_BARYC_SAMPLE_CNTL(x) (((x) & 0x3) << 26) -#define G_0286CC_BARYC_SAMPLE_CNTL(x) (((x) >> 26) & 0x3) -#define C_0286CC_BARYC_SAMPLE_CNTL 0xF3FFFFFF -#define S_0286CC_PERSP_GRADIENT_ENA(x) (((x) & 0x1) << 28) -#define G_0286CC_PERSP_GRADIENT_ENA(x) (((x) >> 28) & 0x1) -#define C_0286CC_PERSP_GRADIENT_ENA 0xEFFFFFFF -#define S_0286CC_LINEAR_GRADIENT_ENA(x) (((x) & 0x1) << 29) -#define G_0286CC_LINEAR_GRADIENT_ENA(x) (((x) >> 29) & 0x1) -#define C_0286CC_LINEAR_GRADIENT_ENA 0xDFFFFFFF -#define S_0286CC_POSITION_SAMPLE(x) (((x) & 0x1) << 30) -#define G_0286CC_POSITION_SAMPLE(x) (((x) >> 30) & 0x1) -#define C_0286CC_POSITION_SAMPLE 0xBFFFFFFF -#define S_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) & 0x1) << 31) -#define G_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) >> 31) & 0x1) -#define C_0286CC_BARYC_AT_SAMPLE_ENA 0x7FFFFFFF -#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 -#define S_0286D0_GEN_INDEX_PIX(x) (((x) & 0x1) << 0) -#define G_0286D0_GEN_INDEX_PIX(x) (((x) >> 0) & 0x1) -#define C_0286D0_GEN_INDEX_PIX 0xFFFFFFFE -#define S_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) & 0x7F) << 1) -#define G_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) >> 1) & 0x7F) -#define C_0286D0_GEN_INDEX_PIX_ADDR 0xFFFFFF01 -#define S_0286D0_FRONT_FACE_ENA(x) (((x) & 0x1) << 8) -#define G_0286D0_FRONT_FACE_ENA(x) (((x) >> 8) & 0x1) -#define C_0286D0_FRONT_FACE_ENA 0xFFFFFEFF -#define S_0286D0_FRONT_FACE_CHAN(x) (((x) & 0x3) << 9) -#define G_0286D0_FRONT_FACE_CHAN(x) (((x) >> 9) & 0x3) -#define C_0286D0_FRONT_FACE_CHAN 0xFFFFF9FF -#define S_0286D0_FRONT_FACE_ALL_BITS(x) (((x) & 0x1) << 11) -#define G_0286D0_FRONT_FACE_ALL_BITS(x) (((x) >> 11) & 0x1) -#define C_0286D0_FRONT_FACE_ALL_BITS 0xFFFFF7FF -#define S_0286D0_FRONT_FACE_ADDR(x) (((x) & 0x1F) << 12) -#define G_0286D0_FRONT_FACE_ADDR(x) (((x) >> 12) & 0x1F) -#define C_0286D0_FRONT_FACE_ADDR 0xFFFE0FFF -#define S_0286D0_FOG_ADDR(x) (((x) & 0x7F) << 17) -#define G_0286D0_FOG_ADDR(x) (((x) >> 17) & 0x7F) -#define C_0286D0_FOG_ADDR 0xFF01FFFF -#define S_0286D0_FIXED_PT_POSITION_ENA(x) (((x) & 0x1) << 24) -#define G_0286D0_FIXED_PT_POSITION_ENA(x) (((x) >> 24) & 0x1) -#define C_0286D0_FIXED_PT_POSITION_ENA 0xFEFFFFFF -#define S_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) & 0x1F) << 25) -#define G_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) >> 25) & 0x1F) -#define C_0286D0_FIXED_PT_POSITION_ADDR 0xC1FFFFFF -#define R_0286C4_SPI_VS_OUT_CONFIG 0x0286C4 -#define S_0286C4_VS_PER_COMPONENT(x) (((x) & 0x1) << 0) -#define G_0286C4_VS_PER_COMPONENT(x) (((x) >> 0) & 0x1) -#define C_0286C4_VS_PER_COMPONENT 0xFFFFFFFE -#define S_0286C4_VS_EXPORT_COUNT(x) (((x) & 0x1F) << 1) -#define G_0286C4_VS_EXPORT_COUNT(x) (((x) >> 1) & 0x1F) -#define C_0286C4_VS_EXPORT_COUNT 0xFFFFFFC1 -#define S_0286C4_VS_EXPORTS_FOG(x) (((x) & 0x1) << 8) -#define G_0286C4_VS_EXPORTS_FOG(x) (((x) >> 8) & 0x1) -#define C_0286C4_VS_EXPORTS_FOG 0xFFFFFEFF -#define S_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) & 0x1F) << 9) -#define G_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) >> 9) & 0x1F) -#define C_0286C4_VS_OUT_FOG_VEC_ADDR 0xFFFFC1FF -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 -#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028244_BR_X 0xFFFFC000 -#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028244_BR_Y 0xC000FFFF -#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 -#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) -#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) -#define C_028030_TL_X 0xFFFF8000 -#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) -#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028030_TL_Y 0x8000FFFF -#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 -#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) -#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) -#define C_028034_BR_X 0xFFFF8000 -#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) -#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028034_BR_Y 0x8000FFFF -#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 -#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028204_TL_X 0xFFFFC000 -#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028204_TL_Y 0xC000FFFF -#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 -#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028208_BR_X 0xFFFFC000 -#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028208_BR_Y 0xC000FFFF -#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 -#define S_0287F0_SOURCE_SELECT(x) (((x) & 0x3) << 0) -#define G_0287F0_SOURCE_SELECT(x) (((x) >> 0) & 0x3) -#define C_0287F0_SOURCE_SELECT 0xFFFFFFFC -#define S_0287F0_MAJOR_MODE(x) (((x) & 0x3) << 2) -#define G_0287F0_MAJOR_MODE(x) (((x) >> 2) & 0x3) -#define C_0287F0_MAJOR_MODE 0xFFFFFFF3 -#define S_0287F0_SPRITE_EN(x) (((x) & 0x1) << 4) -#define G_0287F0_SPRITE_EN(x) (((x) >> 4) & 0x1) -#define C_0287F0_SPRITE_EN 0xFFFFFFEF -#define S_0287F0_NOT_EOP(x) (((x) & 0x1) << 5) -#define G_0287F0_NOT_EOP(x) (((x) >> 5) & 0x1) -#define C_0287F0_NOT_EOP 0xFFFFFFDF -#define S_0287F0_USE_OPAQUE(x) (((x) & 0x1) << 6) -#define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1) -#define C_0287F0_USE_OPAQUE 0xFFFFFFBF -#define R_0280A0_CB_COLOR0_INFO 0x0280A0 -#define R_0280A4_CB_COLOR1_INFO 0x0280A4 -#define R_0280A8_CB_COLOR2_INFO 0x0280A8 -#define R_0280AC_CB_COLOR3_INFO 0x0280AC -#define R_0280B0_CB_COLOR4_INFO 0x0280B0 -#define R_0280B4_CB_COLOR5_INFO 0x0280B4 -#define R_0280B8_CB_COLOR6_INFO 0x0280B8 -#define R_0280BC_CB_COLOR7_INFO 0x0280BC -#define R_02800C_DB_DEPTH_BASE 0x02800C -#define R_028000_DB_DEPTH_SIZE 0x028000 -#define R_028004_DB_DEPTH_VIEW 0x028004 -#define R_028010_DB_DEPTH_INFO 0x028010 -#define R_028D24_DB_HTILE_SURFACE 0x028D24 -#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 -#define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 -#define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 -#define R_028C00_PA_SC_LINE_CNTL 0x028C00 -#define R_028C04_PA_SC_AA_CONFIG 0x028C04 -#define R_028C08_PA_SU_VTX_CNTL 0x028C08 -#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C -#define R_028C48_PA_SC_AA_MASK 0x028C48 -#define R_028810_PA_CL_CLIP_CNTL 0x028810 -#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C -#define R_028820_PA_CL_NANINF_CNTL 0x028820 -#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C -#define R_028C10_PA_CL_GB_VERT_DISC_ADJ 0x028C10 -#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ 0x028C14 -#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ 0x028C18 -#define R_028814_PA_SU_SC_MODE_CNTL 0x028814 -#define R_028A00_PA_SU_POINT_SIZE 0x028A00 -#define R_028A04_PA_SU_POINT_MINMAX 0x028A04 -#define R_028A08_PA_SU_LINE_CNTL 0x028A08 -#define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C -#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 -#define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC -#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 -#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 -#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 -#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C -#define R_028818_PA_CL_VTE_CNTL 0x028818 -#define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C -#define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 -#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C -#define R_028440_PA_CL_VPORT_XOFFSET_0 0x028440 -#define R_028448_PA_CL_VPORT_YOFFSET_0 0x028448 -#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x028450 -#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250 -#define R_028254_PA_SC_VPORT_SCISSOR_0_BR 0x028254 -#define R_028780_CB_BLEND0_CONTROL 0x028780 -#define R_028784_CB_BLEND1_CONTROL 0x028784 -#define R_028788_CB_BLEND2_CONTROL 0x028788 -#define R_02878C_CB_BLEND3_CONTROL 0x02878C -#define R_028790_CB_BLEND4_CONTROL 0x028790 -#define R_028794_CB_BLEND5_CONTROL 0x028794 -#define R_028798_CB_BLEND6_CONTROL 0x028798 -#define R_02879C_CB_BLEND7_CONTROL 0x02879C -#define R_028804_CB_BLEND_CONTROL 0x028804 -#define R_028028_DB_STENCIL_CLEAR 0x028028 -#define R_02802C_DB_DEPTH_CLEAR 0x02802C -#define R_028430_DB_STENCILREFMASK 0x028430 -#define R_028434_DB_STENCILREFMASK_BF 0x028434 -#define R_028800_DB_DEPTH_CONTROL 0x028800 -#define R_02880C_DB_SHADER_CONTROL 0x02880C -#define R_028D0C_DB_RENDER_CONTROL 0x028D0C -#define S_028D0C_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) -#define S_028D0C_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) -#define S_028D0C_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) -#define S_028D0C_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) -#define S_028D0C_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) -#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) -#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) -#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) -#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) -#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define R_028D2C_DB_SRESULTS_COMPARE_STATE1 0x028D2C -#define R_028D30_DB_PRELOAD_CONTROL 0x028D30 -#define R_028D44_DB_ALPHA_TO_MASK 0x028D44 -#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 -#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 -#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644 -#define R_028648_SPI_PS_INPUT_CNTL_1 0x028648 -#define R_02864C_SPI_PS_INPUT_CNTL_2 0x02864C -#define R_028650_SPI_PS_INPUT_CNTL_3 0x028650 -#define R_028654_SPI_PS_INPUT_CNTL_4 0x028654 -#define R_028658_SPI_PS_INPUT_CNTL_5 0x028658 -#define R_02865C_SPI_PS_INPUT_CNTL_6 0x02865C -#define R_028660_SPI_PS_INPUT_CNTL_7 0x028660 -#define R_028664_SPI_PS_INPUT_CNTL_8 0x028664 -#define R_028668_SPI_PS_INPUT_CNTL_9 0x028668 -#define R_02866C_SPI_PS_INPUT_CNTL_10 0x02866C -#define R_028670_SPI_PS_INPUT_CNTL_11 0x028670 -#define R_028674_SPI_PS_INPUT_CNTL_12 0x028674 -#define R_028678_SPI_PS_INPUT_CNTL_13 0x028678 -#define R_02867C_SPI_PS_INPUT_CNTL_14 0x02867C -#define R_028680_SPI_PS_INPUT_CNTL_15 0x028680 -#define R_028684_SPI_PS_INPUT_CNTL_16 0x028684 -#define R_028688_SPI_PS_INPUT_CNTL_17 0x028688 -#define R_02868C_SPI_PS_INPUT_CNTL_18 0x02868C -#define R_028690_SPI_PS_INPUT_CNTL_19 0x028690 -#define R_028694_SPI_PS_INPUT_CNTL_20 0x028694 -#define R_028698_SPI_PS_INPUT_CNTL_21 0x028698 -#define R_02869C_SPI_PS_INPUT_CNTL_22 0x02869C -#define R_0286A0_SPI_PS_INPUT_CNTL_23 0x0286A0 -#define R_0286A4_SPI_PS_INPUT_CNTL_24 0x0286A4 -#define R_0286A8_SPI_PS_INPUT_CNTL_25 0x0286A8 -#define R_0286AC_SPI_PS_INPUT_CNTL_26 0x0286AC -#define R_0286B0_SPI_PS_INPUT_CNTL_27 0x0286B0 -#define R_0286B4_SPI_PS_INPUT_CNTL_28 0x0286B4 -#define R_0286B8_SPI_PS_INPUT_CNTL_29 0x0286B8 -#define R_0286BC_SPI_PS_INPUT_CNTL_30 0x0286BC -#define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 -#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 -#define R_028854_SQ_PGM_EXPORTS_PS 0x028854 -#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 -#define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C -#define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 -#define R_008970_VGT_NUM_INDICES 0x008970 -#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 -#define R_028238_CB_TARGET_MASK 0x028238 -#define R_02823C_CB_SHADER_MASK 0x02823C -#define R_028060_CB_COLOR0_SIZE 0x028060 -#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028060_SLICE_TILE_MAX 0xC00003FF -#define R_028064_CB_COLOR1_SIZE 0x028064 -#define R_028068_CB_COLOR2_SIZE 0x028068 -#define R_02806C_CB_COLOR3_SIZE 0x02806C -#define R_028070_CB_COLOR4_SIZE 0x028070 -#define R_028074_CB_COLOR5_SIZE 0x028074 -#define R_028078_CB_COLOR6_SIZE 0x028078 -#define R_02807C_CB_COLOR7_SIZE 0x02807C -#define R_028040_CB_COLOR0_BASE 0x028040 -#define R_028044_CB_COLOR1_BASE 0x028044 -#define R_028048_CB_COLOR2_BASE 0x028048 -#define R_02804C_CB_COLOR3_BASE 0x02804C -#define R_028050_CB_COLOR4_BASE 0x028050 -#define R_028054_CB_COLOR5_BASE 0x028054 -#define R_028058_CB_COLOR6_BASE 0x028058 -#define R_02805C_CB_COLOR7_BASE 0x02805C -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define R_028C04_PA_SC_AA_CONFIG 0x028C04 -#define S_028C04_MSAA_NUM_SAMPLES(x) (((x) & 0x3) << 0) -#define G_028C04_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x3) -#define C_028C04_MSAA_NUM_SAMPLES 0xFFFFFFFC -#define S_028C04_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4) -#define G_028C04_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1) -#define C_028C04_AA_MASK_CENTROID_DTMN 0xFFFFFFEF -#define S_028C04_MAX_SAMPLE_DIST(x) (((x) & 0xF) << 13) -#define G_028C04_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0xF) -#define C_028C04_MAX_SAMPLE_DIST 0xFFFE1FFF -#define R_0288CC_SQ_PGM_CF_OFFSET_PS 0x0288CC -#define R_0288DC_SQ_PGM_CF_OFFSET_FS 0x0288DC -#define R_0288D0_SQ_PGM_CF_OFFSET_VS 0x0288D0 -#define R_028840_SQ_PGM_START_PS 0x028840 -#define R_028894_SQ_PGM_START_FS 0x028894 -#define R_028858_SQ_PGM_START_VS 0x028858 -#define R_028080_CB_COLOR0_VIEW 0x028080 -#define S_028080_SLICE_START(x) (((x) & 0x7FF) << 0) -#define G_028080_SLICE_START(x) (((x) >> 0) & 0x7FF) -#define C_028080_SLICE_START 0xFFFFF800 -#define S_028080_SLICE_MAX(x) (((x) & 0x7FF) << 13) -#define G_028080_SLICE_MAX(x) (((x) >> 13) & 0x7FF) -#define C_028080_SLICE_MAX 0xFF001FFF -#define R_028084_CB_COLOR1_VIEW 0x028084 -#define R_028088_CB_COLOR2_VIEW 0x028088 -#define R_02808C_CB_COLOR3_VIEW 0x02808C -#define R_028090_CB_COLOR4_VIEW 0x028090 -#define R_028094_CB_COLOR5_VIEW 0x028094 -#define R_028098_CB_COLOR6_VIEW 0x028098 -#define R_02809C_CB_COLOR7_VIEW 0x02809C -#define R_028100_CB_COLOR0_MASK 0x028100 -#define S_028100_CMASK_BLOCK_MAX(x) (((x) & 0xFFF) << 0) -#define G_028100_CMASK_BLOCK_MAX(x) (((x) >> 0) & 0xFFF) -#define C_028100_CMASK_BLOCK_MAX 0xFFFFF000 -#define S_028100_FMASK_TILE_MAX(x) (((x) & 0xFFFFF) << 12) -#define G_028100_FMASK_TILE_MAX(x) (((x) >> 12) & 0xFFFFF) -#define C_028100_FMASK_TILE_MAX 0x00000FFF -#define R_028104_CB_COLOR1_MASK 0x028104 -#define R_028108_CB_COLOR2_MASK 0x028108 -#define R_02810C_CB_COLOR3_MASK 0x02810C -#define R_028110_CB_COLOR4_MASK 0x028110 -#define R_028114_CB_COLOR5_MASK 0x028114 -#define R_028118_CB_COLOR6_MASK 0x028118 -#define R_02811C_CB_COLOR7_MASK 0x02811C -#define R_028040_CB_COLOR0_BASE 0x028040 -#define S_028040_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028040_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028040_BASE_256B 0x00000000 -#define R_0280E0_CB_COLOR0_FRAG 0x0280E0 -#define S_0280E0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0280E0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0280E0_BASE_256B 0x00000000 -#define R_0280E4_CB_COLOR1_FRAG 0x0280E4 -#define R_0280E8_CB_COLOR2_FRAG 0x0280E8 -#define R_0280EC_CB_COLOR3_FRAG 0x0280EC -#define R_0280F0_CB_COLOR4_FRAG 0x0280F0 -#define R_0280F4_CB_COLOR5_FRAG 0x0280F4 -#define R_0280F8_CB_COLOR6_FRAG 0x0280F8 -#define R_0280FC_CB_COLOR7_FRAG 0x0280FC -#define R_0280C0_CB_COLOR0_TILE 0x0280C0 -#define S_0280C0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0280C0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0280C0_BASE_256B 0x00000000 -#define R_0280C4_CB_COLOR1_TILE 0x0280C4 -#define R_0280C8_CB_COLOR2_TILE 0x0280C8 -#define R_0280CC_CB_COLOR3_TILE 0x0280CC -#define R_0280D0_CB_COLOR4_TILE 0x0280D0 -#define R_0280D4_CB_COLOR5_TILE 0x0280D4 -#define R_0280D8_CB_COLOR6_TILE 0x0280D8 -#define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF -#define R_028614_SPI_VS_OUT_ID_0 0x028614 -#define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) -#define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) -#define C_028614_SEMANTIC_0 0xFFFFFF00 -#define S_028614_SEMANTIC_1(x) (((x) & 0xFF) << 8) -#define G_028614_SEMANTIC_1(x) (((x) >> 8) & 0xFF) -#define C_028614_SEMANTIC_1 0xFFFF00FF -#define S_028614_SEMANTIC_2(x) (((x) & 0xFF) << 16) -#define G_028614_SEMANTIC_2(x) (((x) >> 16) & 0xFF) -#define C_028614_SEMANTIC_2 0xFF00FFFF -#define S_028614_SEMANTIC_3(x) (((x) & 0xFF) << 24) -#define G_028614_SEMANTIC_3(x) (((x) >> 24) & 0xFF) -#define C_028614_SEMANTIC_3 0x00FFFFFF -#define R_028618_SPI_VS_OUT_ID_1 0x028618 -#define R_02861C_SPI_VS_OUT_ID_2 0x02861C -#define R_028620_SPI_VS_OUT_ID_3 0x028620 -#define R_028624_SPI_VS_OUT_ID_4 0x028624 -#define R_028628_SPI_VS_OUT_ID_5 0x028628 -#define R_02862C_SPI_VS_OUT_ID_6 0x02862C -#define R_028630_SPI_VS_OUT_ID_7 0x028630 -#define R_028634_SPI_VS_OUT_ID_8 0x028634 -#define R_028638_SPI_VS_OUT_ID_9 0x028638 -#define R_038000_SQ_TEX_RESOURCE_WORD0_0 0x038000 -#define S_038000_DIM(x) (((x) & 0x7) << 0) -#define G_038000_DIM(x) (((x) >> 0) & 0x7) -#define C_038000_DIM 0xFFFFFFF8 -#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) -#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) -#define C_038000_TILE_MODE 0xFFFFFF87 -#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) -#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) -#define C_038000_TILE_TYPE 0xFFFFFF7F -#define S_038000_PITCH(x) (((x) & 0x7FF) << 8) -#define G_038000_PITCH(x) (((x) >> 8) & 0x7FF) -#define C_038000_PITCH 0xFFF800FF -#define S_038000_TEX_WIDTH(x) (((x) & 0x1FFF) << 19) -#define G_038000_TEX_WIDTH(x) (((x) >> 19) & 0x1FFF) -#define C_038000_TEX_WIDTH 0x0007FFFF -#define R_038004_SQ_TEX_RESOURCE_WORD1_0 0x038004 -#define S_038004_TEX_HEIGHT(x) (((x) & 0x1FFF) << 0) -#define G_038004_TEX_HEIGHT(x) (((x) >> 0) & 0x1FFF) -#define C_038004_TEX_HEIGHT 0xFFFFE000 -#define S_038004_TEX_DEPTH(x) (((x) & 0x1FFF) << 13) -#define G_038004_TEX_DEPTH(x) (((x) >> 13) & 0x1FFF) -#define C_038004_TEX_DEPTH 0xFC001FFF -#define S_038004_DATA_FORMAT(x) (((x) & 0x3F) << 26) -#define G_038004_DATA_FORMAT(x) (((x) >> 26) & 0x3F) -#define C_038004_DATA_FORMAT 0x03FFFFFF -#define V_038004_COLOR_INVALID 0x00000000 -#define V_038004_COLOR_8 0x00000001 -#define V_038004_COLOR_4_4 0x00000002 -#define V_038004_COLOR_3_3_2 0x00000003 -#define V_038004_COLOR_16 0x00000005 -#define V_038004_COLOR_16_FLOAT 0x00000006 -#define V_038004_COLOR_8_8 0x00000007 -#define V_038004_COLOR_5_6_5 0x00000008 -#define V_038004_COLOR_6_5_5 0x00000009 -#define V_038004_COLOR_1_5_5_5 0x0000000A -#define V_038004_COLOR_4_4_4_4 0x0000000B -#define V_038004_COLOR_5_5_5_1 0x0000000C -#define V_038004_COLOR_32 0x0000000D -#define V_038004_COLOR_32_FLOAT 0x0000000E -#define V_038004_COLOR_16_16 0x0000000F -#define V_038004_COLOR_16_16_FLOAT 0x00000010 -#define V_038004_COLOR_8_24 0x00000011 -#define V_038004_COLOR_8_24_FLOAT 0x00000012 -#define V_038004_COLOR_24_8 0x00000013 -#define V_038004_COLOR_24_8_FLOAT 0x00000014 -#define V_038004_COLOR_10_11_11 0x00000015 -#define V_038004_COLOR_10_11_11_FLOAT 0x00000016 -#define V_038004_COLOR_11_11_10 0x00000017 -#define V_038004_COLOR_11_11_10_FLOAT 0x00000018 -#define V_038004_COLOR_2_10_10_10 0x00000019 -#define V_038004_COLOR_8_8_8_8 0x0000001A -#define V_038004_COLOR_10_10_10_2 0x0000001B -#define V_038004_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_038004_COLOR_32_32 0x0000001D -#define V_038004_COLOR_32_32_FLOAT 0x0000001E -#define V_038004_COLOR_16_16_16_16 0x0000001F -#define V_038004_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_038004_COLOR_32_32_32_32 0x00000022 -#define V_038004_COLOR_32_32_32_32_FLOAT 0x00000023 -#define R_038008_SQ_TEX_RESOURCE_WORD2_0 0x038008 -#define S_038008_BASE_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_038008_BASE_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_038008_BASE_ADDRESS 0x00000000 -#define R_03800C_SQ_TEX_RESOURCE_WORD3_0 0x03800C -#define S_03800C_MIP_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_03800C_MIP_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_03800C_MIP_ADDRESS 0x00000000 -#define R_038010_SQ_TEX_RESOURCE_WORD4_0 0x038010 -#define S_038010_FORMAT_COMP_X(x) (((x) & 0x3) << 0) -#define G_038010_FORMAT_COMP_X(x) (((x) >> 0) & 0x3) -#define C_038010_FORMAT_COMP_X 0xFFFFFFFC -#define S_038010_FORMAT_COMP_Y(x) (((x) & 0x3) << 2) -#define G_038010_FORMAT_COMP_Y(x) (((x) >> 2) & 0x3) -#define C_038010_FORMAT_COMP_Y 0xFFFFFFF3 -#define S_038010_FORMAT_COMP_Z(x) (((x) & 0x3) << 4) -#define G_038010_FORMAT_COMP_Z(x) (((x) >> 4) & 0x3) -#define C_038010_FORMAT_COMP_Z 0xFFFFFFCF -#define S_038010_FORMAT_COMP_W(x) (((x) & 0x3) << 6) -#define G_038010_FORMAT_COMP_W(x) (((x) >> 6) & 0x3) -#define C_038010_FORMAT_COMP_W 0xFFFFFF3F -#define S_038010_NUM_FORMAT_ALL(x) (((x) & 0x3) << 8) -#define G_038010_NUM_FORMAT_ALL(x) (((x) >> 8) & 0x3) -#define C_038010_NUM_FORMAT_ALL 0xFFFFFCFF -#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) -#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) -#define C_038010_SRF_MODE_ALL 0xFFFFFBFF -#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) -#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) -#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF -#define S_038010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) -#define G_038010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) -#define C_038010_ENDIAN_SWAP 0xFFFFCFFF -#define S_038010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_038010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_038010_REQUEST_SIZE 0xFFFF3FFF -#define S_038010_DST_SEL_X(x) (((x) & 0x7) << 16) -#define G_038010_DST_SEL_X(x) (((x) >> 16) & 0x7) -#define C_038010_DST_SEL_X 0xFFF8FFFF -#define S_038010_DST_SEL_Y(x) (((x) & 0x7) << 19) -#define G_038010_DST_SEL_Y(x) (((x) >> 19) & 0x7) -#define C_038010_DST_SEL_Y 0xFFC7FFFF -#define S_038010_DST_SEL_Z(x) (((x) & 0x7) << 22) -#define G_038010_DST_SEL_Z(x) (((x) >> 22) & 0x7) -#define C_038010_DST_SEL_Z 0xFE3FFFFF -#define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25) -#define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7) -#define C_038010_DST_SEL_W 0xF1FFFFFF -#define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28) -#define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF) -#define C_038010_BASE_LEVEL 0x0FFFFFFF -#define R_038014_SQ_TEX_RESOURCE_WORD5_0 0x038014 -#define S_038014_LAST_LEVEL(x) (((x) & 0xF) << 0) -#define G_038014_LAST_LEVEL(x) (((x) >> 0) & 0xF) -#define C_038014_LAST_LEVEL 0xFFFFFFF0 -#define S_038014_BASE_ARRAY(x) (((x) & 0x1FFF) << 4) -#define G_038014_BASE_ARRAY(x) (((x) >> 4) & 0x1FFF) -#define C_038014_BASE_ARRAY 0xFFFE000F -#define S_038014_LAST_ARRAY(x) (((x) & 0x1FFF) << 17) -#define G_038014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF) -#define C_038014_LAST_ARRAY 0xC001FFFF -#define R_038018_SQ_TEX_RESOURCE_WORD6_0 0x038018 -#define S_038018_MPEG_CLAMP(x) (((x) & 0x3) << 0) -#define G_038018_MPEG_CLAMP(x) (((x) >> 0) & 0x3) -#define C_038018_MPEG_CLAMP 0xFFFFFFFC -#define S_038018_PERF_MODULATION(x) (((x) & 0x7) << 5) -#define G_038018_PERF_MODULATION(x) (((x) >> 5) & 0x7) -#define C_038018_PERF_MODULATION 0xFFFFFF1F -#define S_038018_INTERLACED(x) (((x) & 0x1) << 8) -#define G_038018_INTERLACED(x) (((x) >> 8) & 0x1) -#define C_038018_INTERLACED 0xFFFFFEFF -#define S_038018_TYPE(x) (((x) & 0x3) << 30) -#define G_038018_TYPE(x) (((x) >> 30) & 0x3) -#define C_038018_TYPE 0x3FFFFFFF -#define R_008040_WAIT_UNTIL 0x008040 -#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) -#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) -#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF -#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) -#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) -#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF -#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) -#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) -#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF -#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) -#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) -#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF -#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) -#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) -#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF -#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) -#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) -#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF -#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) -#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) -#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF -#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) -#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) -#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF -#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 -#define S_008958_PRIM_TYPE(x) (((x) & 0x3F) << 0) -#define G_008958_PRIM_TYPE(x) (((x) >> 0) & 0x3F) -#define C_008958_PRIM_TYPE 0xFFFFFFC0 -#define R_008C00_SQ_CONFIG 0x008C00 -#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0) -#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1) -#define C_008C00_VC_ENABLE 0xFFFFFFFE -#define S_008C00_EXPORT_SRC_C(x) (((x) & 0x1) << 1) -#define G_008C00_EXPORT_SRC_C(x) (((x) >> 1) & 0x1) -#define C_008C00_EXPORT_SRC_C 0xFFFFFFFD -#define S_008C00_DX9_CONSTS(x) (((x) & 0x1) << 2) -#define G_008C00_DX9_CONSTS(x) (((x) >> 2) & 0x1) -#define C_008C00_DX9_CONSTS 0xFFFFFFFB -#define S_008C00_ALU_INST_PREFER_VECTOR(x) (((x) & 0x1) << 3) -#define G_008C00_ALU_INST_PREFER_VECTOR(x) (((x) >> 3) & 0x1) -#define C_008C00_ALU_INST_PREFER_VECTOR 0xFFFFFFF7 -#define S_008C00_DX10_CLAMP(x) (((x) & 0x1) << 4) -#define G_008C00_DX10_CLAMP(x) (((x) >> 4) & 0x1) -#define C_008C00_DX10_CLAMP 0xFFFFFFEF -#define S_008C00_ALU_PREFER_ONE_WATERFALL(x) (((x) & 0x1) << 5) -#define G_008C00_ALU_PREFER_ONE_WATERFALL(x) (((x) >> 5) & 0x1) -#define C_008C00_ALU_PREFER_ONE_WATERFALL 0xFFFFFFDF -#define S_008C00_ALU_MAX_ONE_WATERFALL(x) (((x) & 0x1) << 6) -#define G_008C00_ALU_MAX_ONE_WATERFALL(x) (((x) >> 6) & 0x1) -#define C_008C00_ALU_MAX_ONE_WATERFALL 0xFFFFFFBF -#define S_008C00_CLAUSE_SEQ_PRIO(x) (((x) & 0x3) << 8) -#define G_008C00_CLAUSE_SEQ_PRIO(x) (((x) >> 8) & 0x3) -#define C_008C00_CLAUSE_SEQ_PRIO 0xFFFFFCFF -#define S_008C00_PS_PRIO(x) (((x) & 0x3) << 24) -#define G_008C00_PS_PRIO(x) (((x) >> 24) & 0x3) -#define C_008C00_PS_PRIO 0xFCFFFFFF -#define S_008C00_VS_PRIO(x) (((x) & 0x3) << 26) -#define G_008C00_VS_PRIO(x) (((x) >> 26) & 0x3) -#define C_008C00_VS_PRIO 0xF3FFFFFF -#define S_008C00_GS_PRIO(x) (((x) & 0x3) << 28) -#define G_008C00_GS_PRIO(x) (((x) >> 28) & 0x3) -#define C_008C00_GS_PRIO 0xCFFFFFFF -#define S_008C00_ES_PRIO(x) (((x) & 0x3) << 30) -#define G_008C00_ES_PRIO(x) (((x) >> 30) & 0x3) -#define C_008C00_ES_PRIO 0x3FFFFFFF -#define R_008C04_SQ_GPR_RESOURCE_MGMT_1 0x008C04 -#define S_008C04_NUM_PS_GPRS(x) (((x) & 0xFF) << 0) -#define G_008C04_NUM_PS_GPRS(x) (((x) >> 0) & 0xFF) -#define C_008C04_NUM_PS_GPRS 0xFFFFFF00 -#define S_008C04_NUM_VS_GPRS(x) (((x) & 0xFF) << 16) -#define G_008C04_NUM_VS_GPRS(x) (((x) >> 16) & 0xFF) -#define C_008C04_NUM_VS_GPRS 0xFF00FFFF -#define S_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) & 0xF) << 28) -#define G_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) >> 28) & 0xF) -#define C_008C04_NUM_CLAUSE_TEMP_GPRS 0x0FFFFFFF -#define R_008C08_SQ_GPR_RESOURCE_MGMT_2 0x008C08 -#define S_008C08_NUM_GS_GPRS(x) (((x) & 0xFF) << 0) -#define G_008C08_NUM_GS_GPRS(x) (((x) >> 0) & 0xFF) -#define C_008C08_NUM_GS_GPRS 0xFFFFFF00 -#define S_008C08_NUM_ES_GPRS(x) (((x) & 0xFF) << 16) -#define G_008C08_NUM_ES_GPRS(x) (((x) >> 16) & 0xFF) -#define C_008C08_NUM_ES_GPRS 0xFF00FFFF -#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x008C0C -#define S_008C0C_NUM_PS_THREADS(x) (((x) & 0xFF) << 0) -#define G_008C0C_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF) -#define C_008C0C_NUM_PS_THREADS 0xFFFFFF00 -#define S_008C0C_NUM_VS_THREADS(x) (((x) & 0xFF) << 8) -#define G_008C0C_NUM_VS_THREADS(x) (((x) >> 8) & 0xFF) -#define C_008C0C_NUM_VS_THREADS 0xFFFF00FF -#define S_008C0C_NUM_GS_THREADS(x) (((x) & 0xFF) << 16) -#define G_008C0C_NUM_GS_THREADS(x) (((x) >> 16) & 0xFF) -#define C_008C0C_NUM_GS_THREADS 0xFF00FFFF -#define S_008C0C_NUM_ES_THREADS(x) (((x) & 0xFF) << 24) -#define G_008C0C_NUM_ES_THREADS(x) (((x) >> 24) & 0xFF) -#define C_008C0C_NUM_ES_THREADS 0x00FFFFFF -#define R_008C10_SQ_STACK_RESOURCE_MGMT_1 0x008C10 -#define S_008C10_NUM_PS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) -#define G_008C10_NUM_PS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) -#define C_008C10_NUM_PS_STACK_ENTRIES 0xFFFFF000 -#define S_008C10_NUM_VS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) -#define G_008C10_NUM_VS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) -#define C_008C10_NUM_VS_STACK_ENTRIES 0xF000FFFF -#define R_008C14_SQ_STACK_RESOURCE_MGMT_2 0x008C14 -#define S_008C14_NUM_GS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) -#define G_008C14_NUM_GS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) -#define C_008C14_NUM_GS_STACK_ENTRIES 0xFFFFF000 -#define S_008C14_NUM_ES_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) -#define G_008C14_NUM_ES_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) -#define C_008C14_NUM_ES_STACK_ENTRIES 0xF000FFFF -#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x008D8C -#define S_008D8C_RING0_OFFSET(x) (((x) & 0xFF) << 0) -#define G_008D8C_RING0_OFFSET(x) (((x) >> 0) & 0xFF) -#define C_008D8C_RING0_OFFSET 0xFFFFFF00 -#define S_008D8C_ISOLATE_ES_ENABLE(x) (((x) & 0x1) << 12) -#define G_008D8C_ISOLATE_ES_ENABLE(x) (((x) >> 12) & 0x1) -#define C_008D8C_ISOLATE_ES_ENABLE 0xFFFFEFFF -#define S_008D8C_ISOLATE_GS_ENABLE(x) (((x) & 0x1) << 13) -#define G_008D8C_ISOLATE_GS_ENABLE(x) (((x) >> 13) & 0x1) -#define C_008D8C_ISOLATE_GS_ENABLE 0xFFFFDFFF -#define S_008D8C_VS_PC_LIMIT_ENABLE(x) (((x) & 0x1) << 14) -#define G_008D8C_VS_PC_LIMIT_ENABLE(x) (((x) >> 14) & 0x1) -#define C_008D8C_VS_PC_LIMIT_ENABLE 0xFFFFBFFF -#define R_009508_TA_CNTL_AUX 0x009508 -#define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0) -#define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1) -#define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE -#define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24) -#define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1) -#define C_009508_SYNC_GRADIENT 0xFEFFFFFF -#define S_009508_SYNC_WALKER(x) (((x) & 0x1) << 25) -#define G_009508_SYNC_WALKER(x) (((x) >> 25) & 0x1) -#define C_009508_SYNC_WALKER 0xFDFFFFFF -#define S_009508_SYNC_ALIGNER(x) (((x) & 0x1) << 26) -#define G_009508_SYNC_ALIGNER(x) (((x) >> 26) & 0x1) -#define C_009508_SYNC_ALIGNER 0xFBFFFFFF -#define S_009508_BILINEAR_PRECISION(x) (((x) & 0x1) << 31) -#define G_009508_BILINEAR_PRECISION(x) (((x) >> 31) & 0x1) -#define C_009508_BILINEAR_PRECISION 0x7FFFFFFF -#define R_009714_VC_ENHANCE 0x009714 -#define R_009830_DB_DEBUG 0x009830 -#define R_009838_DB_WATERMARKS 0x009838 -#define S_009838_DEPTH_FREE(x) (((x) & 0x1F) << 0) -#define G_009838_DEPTH_FREE(x) (((x) >> 0) & 0x1F) -#define C_009838_DEPTH_FREE 0xFFFFFFE0 -#define S_009838_DEPTH_FLUSH(x) (((x) & 0x3F) << 5) -#define G_009838_DEPTH_FLUSH(x) (((x) >> 5) & 0x3F) -#define C_009838_DEPTH_FLUSH 0xFFFFF81F -#define S_009838_FORCE_SUMMARIZE(x) (((x) & 0xF) << 11) -#define G_009838_FORCE_SUMMARIZE(x) (((x) >> 11) & 0xF) -#define C_009838_FORCE_SUMMARIZE 0xFFFF87FF -#define S_009838_DEPTH_PENDING_FREE(x) (((x) & 0x1F) << 15) -#define G_009838_DEPTH_PENDING_FREE(x) (((x) >> 15) & 0x1F) -#define C_009838_DEPTH_PENDING_FREE 0xFFF07FFF -#define S_009838_DEPTH_CACHELINE_FREE(x) (((x) & 0x1F) << 20) -#define G_009838_DEPTH_CACHELINE_FREE(x) (((x) >> 20) & 0x1F) -#define C_009838_DEPTH_CACHELINE_FREE 0xFE0FFFFF -#define S_009838_EARLY_Z_PANIC_DISABLE(x) (((x) & 0x1) << 25) -#define G_009838_EARLY_Z_PANIC_DISABLE(x) (((x) >> 25) & 0x1) -#define C_009838_EARLY_Z_PANIC_DISABLE 0xFDFFFFFF -#define S_009838_LATE_Z_PANIC_DISABLE(x) (((x) & 0x1) << 26) -#define G_009838_LATE_Z_PANIC_DISABLE(x) (((x) >> 26) & 0x1) -#define C_009838_LATE_Z_PANIC_DISABLE 0xFBFFFFFF -#define S_009838_RE_Z_PANIC_DISABLE(x) (((x) & 0x1) << 27) -#define G_009838_RE_Z_PANIC_DISABLE(x) (((x) >> 27) & 0x1) -#define C_009838_RE_Z_PANIC_DISABLE 0xF7FFFFFF -#define S_009838_DB_EXTRA_DEBUG(x) (((x) & 0xF) << 28) -#define G_009838_DB_EXTRA_DEBUG(x) (((x) >> 28) & 0xF) -#define C_009838_DB_EXTRA_DEBUG 0x0FFFFFFF -#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 -#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) -#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) -#define C_028030_TL_X 0xFFFF8000 -#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) -#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028030_TL_Y 0x8000FFFF -#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 -#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) -#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) -#define C_028034_BR_X 0xFFFF8000 -#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) -#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028034_BR_Y 0x8000FFFF -#define R_028200_PA_SC_WINDOW_OFFSET 0x028200 -#define S_028200_WINDOW_X_OFFSET(x) (((x) & 0x7FFF) << 0) -#define G_028200_WINDOW_X_OFFSET(x) (((x) >> 0) & 0x7FFF) -#define C_028200_WINDOW_X_OFFSET 0xFFFF8000 -#define S_028200_WINDOW_Y_OFFSET(x) (((x) & 0x7FFF) << 16) -#define G_028200_WINDOW_Y_OFFSET(x) (((x) >> 16) & 0x7FFF) -#define C_028200_WINDOW_Y_OFFSET 0x8000FFFF -#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 -#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028204_TL_X 0xFFFFC000 -#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028204_TL_Y 0xC000FFFF -#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 -#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028208_BR_X 0xFFFFC000 -#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028208_BR_Y 0xC000FFFF -#define R_02820C_PA_SC_CLIPRECT_RULE 0x02820C -#define S_02820C_CLIP_RULE(x) (((x) & 0xFFFF) << 0) -#define G_02820C_CLIP_RULE(x) (((x) >> 0) & 0xFFFF) -#define C_02820C_CLIP_RULE 0xFFFF0000 -#define R_028210_PA_SC_CLIPRECT_0_TL 0x028210 -#define S_028210_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028210_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028210_TL_X 0xFFFFC000 -#define S_028210_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028210_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028210_TL_Y 0xC000FFFF -#define R_028214_PA_SC_CLIPRECT_0_BR 0x028214 -#define S_028214_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028214_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028214_BR_X 0xFFFFC000 -#define S_028214_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028214_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028214_BR_Y 0xC000FFFF -#define R_028218_PA_SC_CLIPRECT_1_TL 0x028218 -#define R_02821C_PA_SC_CLIPRECT_1_BR 0x02821C -#define R_028220_PA_SC_CLIPRECT_2_TL 0x028220 -#define R_028224_PA_SC_CLIPRECT_2_BR 0x028224 -#define R_028228_PA_SC_CLIPRECT_3_TL 0x028228 -#define R_02822C_PA_SC_CLIPRECT_3_BR 0x02822C -#define R_028230_PA_SC_EDGERULE 0x028230 -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 -#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028244_BR_X 0xFFFFC000 -#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028244_BR_Y 0xC000FFFF -#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0 -#define S_0282D0_VPORT_ZMIN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0282D0_VPORT_ZMIN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0282D0_VPORT_ZMIN 0x00000000 -#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4 -#define S_0282D4_VPORT_ZMAX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0282D4_VPORT_ZMAX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0282D4_VPORT_ZMAX 0x00000000 -#define R_028350_SX_MISC 0x028350 -#define S_028350_MULTIPASS(x) (((x) & 0x1) << 0) -#define G_028350_MULTIPASS(x) (((x) >> 0) & 0x1) -#define C_028350_MULTIPASS 0xFFFFFFFE -#define R_028380_SQ_VTX_SEMANTIC_0 0x028380 -#define S_028380_SEMANTIC_ID(x) (((x) & 0xFF) << 0) -#define G_028380_SEMANTIC_ID(x) (((x) >> 0) & 0xFF) -#define C_028380_SEMANTIC_ID 0xFFFFFF00 -#define R_028384_SQ_VTX_SEMANTIC_1 0x028384 -#define R_028388_SQ_VTX_SEMANTIC_2 0x028388 -#define R_02838C_SQ_VTX_SEMANTIC_3 0x02838C -#define R_028390_SQ_VTX_SEMANTIC_4 0x028390 -#define R_028394_SQ_VTX_SEMANTIC_5 0x028394 -#define R_028398_SQ_VTX_SEMANTIC_6 0x028398 -#define R_02839C_SQ_VTX_SEMANTIC_7 0x02839C -#define R_0283A0_SQ_VTX_SEMANTIC_8 0x0283A0 -#define R_0283A4_SQ_VTX_SEMANTIC_9 0x0283A4 -#define R_0283A8_SQ_VTX_SEMANTIC_10 0x0283A8 -#define R_0283AC_SQ_VTX_SEMANTIC_11 0x0283AC -#define R_0283B0_SQ_VTX_SEMANTIC_12 0x0283B0 -#define R_0283B4_SQ_VTX_SEMANTIC_13 0x0283B4 -#define R_0283B8_SQ_VTX_SEMANTIC_14 0x0283B8 -#define R_0283BC_SQ_VTX_SEMANTIC_15 0x0283BC -#define R_0283C0_SQ_VTX_SEMANTIC_16 0x0283C0 -#define R_0283C4_SQ_VTX_SEMANTIC_17 0x0283C4 -#define R_0283C8_SQ_VTX_SEMANTIC_18 0x0283C8 -#define R_0283CC_SQ_VTX_SEMANTIC_19 0x0283CC -#define R_0283D0_SQ_VTX_SEMANTIC_20 0x0283D0 -#define R_0283D4_SQ_VTX_SEMANTIC_21 0x0283D4 -#define R_0283D8_SQ_VTX_SEMANTIC_22 0x0283D8 -#define R_0283DC_SQ_VTX_SEMANTIC_23 0x0283DC -#define R_0283E0_SQ_VTX_SEMANTIC_24 0x0283E0 -#define R_0283E4_SQ_VTX_SEMANTIC_25 0x0283E4 -#define R_0283E8_SQ_VTX_SEMANTIC_26 0x0283E8 -#define R_0283EC_SQ_VTX_SEMANTIC_27 0x0283EC -#define R_0283F0_SQ_VTX_SEMANTIC_28 0x0283F0 -#define R_0283F4_SQ_VTX_SEMANTIC_29 0x0283F4 -#define R_0283F8_SQ_VTX_SEMANTIC_30 0x0283F8 -#define R_0283FC_SQ_VTX_SEMANTIC_31 0x0283FC -#define R_028400_VGT_MAX_VTX_INDX 0x028400 -#define S_028400_MAX_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028400_MAX_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028400_MAX_INDX 0x00000000 -#define R_028404_VGT_MIN_VTX_INDX 0x028404 -#define S_028404_MIN_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028404_MIN_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028404_MIN_INDX 0x00000000 -#define R_028408_VGT_INDX_OFFSET 0x028408 -#define S_028408_INDX_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028408_INDX_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028408_INDX_OFFSET 0x00000000 -#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX 0x02840C -#define S_02840C_RESET_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02840C_RESET_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02840C_RESET_INDX 0x00000000 -#define R_028410_SX_ALPHA_TEST_CONTROL 0x028410 -#define S_028410_ALPHA_FUNC(x) (((x) & 0x7) << 0) -#define G_028410_ALPHA_FUNC(x) (((x) >> 0) & 0x7) -#define C_028410_ALPHA_FUNC 0xFFFFFFF8 -#define S_028410_ALPHA_TEST_ENABLE(x) (((x) & 0x1) << 3) -#define G_028410_ALPHA_TEST_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028410_ALPHA_TEST_ENABLE 0xFFFFFFF7 -#define S_028410_ALPHA_TEST_BYPASS(x) (((x) & 0x1) << 8) -#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1) -#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF -#define R_028414_CB_BLEND_RED 0x028414 -#define S_028414_BLEND_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028414_BLEND_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028414_BLEND_RED 0x00000000 -#define R_028418_CB_BLEND_GREEN 0x028418 -#define S_028418_BLEND_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028418_BLEND_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028418_BLEND_GREEN 0x00000000 -#define R_02841C_CB_BLEND_BLUE 0x02841C -#define S_02841C_BLEND_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02841C_BLEND_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02841C_BLEND_BLUE 0x00000000 -#define R_028420_CB_BLEND_ALPHA 0x028420 -#define S_028420_BLEND_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028420_BLEND_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028420_BLEND_ALPHA 0x00000000 -#define R_028438_SX_ALPHA_REF 0x028438 -#define S_028438_ALPHA_REF(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028438_ALPHA_REF(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028438_ALPHA_REF 0x00000000 -#define R_0286C8_SPI_THREAD_GROUPING 0x0286C8 -#define S_0286C8_PS_GROUPING(x) (((x) & 0x1F) << 0) -#define G_0286C8_PS_GROUPING(x) (((x) >> 0) & 0x1F) -#define C_0286C8_PS_GROUPING 0xFFFFFFE0 -#define S_0286C8_VS_GROUPING(x) (((x) & 0x1F) << 8) -#define G_0286C8_VS_GROUPING(x) (((x) >> 8) & 0x1F) -#define C_0286C8_VS_GROUPING 0xFFFFE0FF -#define S_0286C8_GS_GROUPING(x) (((x) & 0x1F) << 16) -#define G_0286C8_GS_GROUPING(x) (((x) >> 16) & 0x1F) -#define C_0286C8_GS_GROUPING 0xFFE0FFFF -#define S_0286C8_ES_GROUPING(x) (((x) & 0x1F) << 24) -#define G_0286C8_ES_GROUPING(x) (((x) >> 24) & 0x1F) -#define C_0286C8_ES_GROUPING 0xE0FFFFFF -#define R_0286D8_SPI_INPUT_Z 0x0286D8 -#define S_0286D8_PROVIDE_Z_TO_SPI(x) (((x) & 0x1) << 0) -#define G_0286D8_PROVIDE_Z_TO_SPI(x) (((x) >> 0) & 0x1) -#define C_0286D8_PROVIDE_Z_TO_SPI 0xFFFFFFFE -#define R_0286DC_SPI_FOG_CNTL 0x0286DC -#define S_0286DC_PASS_FOG_THROUGH_PS(x) (((x) & 0x1) << 0) -#define G_0286DC_PASS_FOG_THROUGH_PS(x) (((x) >> 0) & 0x1) -#define C_0286DC_PASS_FOG_THROUGH_PS 0xFFFFFFFE -#define S_0286DC_PIXEL_FOG_FUNC(x) (((x) & 0x3) << 1) -#define G_0286DC_PIXEL_FOG_FUNC(x) (((x) >> 1) & 0x3) -#define C_0286DC_PIXEL_FOG_FUNC 0xFFFFFFF9 -#define S_0286DC_PIXEL_FOG_SRC_SEL(x) (((x) & 0x1) << 3) -#define G_0286DC_PIXEL_FOG_SRC_SEL(x) (((x) >> 3) & 0x1) -#define C_0286DC_PIXEL_FOG_SRC_SEL 0xFFFFFFF7 -#define S_0286DC_VS_FOG_CLAMP_DISABLE(x) (((x) & 0x1) << 4) -#define G_0286DC_VS_FOG_CLAMP_DISABLE(x) (((x) >> 4) & 0x1) -#define C_0286DC_VS_FOG_CLAMP_DISABLE 0xFFFFFFEF -#define R_0286E0_SPI_FOG_FUNC_SCALE 0x0286E0 -#define S_0286E0_VALUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0286E0_VALUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0286E0_VALUE 0x00000000 -#define R_0286E4_SPI_FOG_FUNC_BIAS 0x0286E4 -#define S_0286E4_VALUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0286E4_VALUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0286E4_VALUE 0x00000000 -#define R_0287A0_CB_SHADER_CONTROL 0x0287A0 -#define S_0287A0_RT0_ENABLE(x) (((x) & 0x1) << 0) -#define G_0287A0_RT0_ENABLE(x) (((x) >> 0) & 0x1) -#define C_0287A0_RT0_ENABLE 0xFFFFFFFE -#define S_0287A0_RT1_ENABLE(x) (((x) & 0x1) << 1) -#define G_0287A0_RT1_ENABLE(x) (((x) >> 1) & 0x1) -#define C_0287A0_RT1_ENABLE 0xFFFFFFFD -#define S_0287A0_RT2_ENABLE(x) (((x) & 0x1) << 2) -#define G_0287A0_RT2_ENABLE(x) (((x) >> 2) & 0x1) -#define C_0287A0_RT2_ENABLE 0xFFFFFFFB -#define S_0287A0_RT3_ENABLE(x) (((x) & 0x1) << 3) -#define G_0287A0_RT3_ENABLE(x) (((x) >> 3) & 0x1) -#define C_0287A0_RT3_ENABLE 0xFFFFFFF7 -#define S_0287A0_RT4_ENABLE(x) (((x) & 0x1) << 4) -#define G_0287A0_RT4_ENABLE(x) (((x) >> 4) & 0x1) -#define C_0287A0_RT4_ENABLE 0xFFFFFFEF -#define S_0287A0_RT5_ENABLE(x) (((x) & 0x1) << 5) -#define G_0287A0_RT5_ENABLE(x) (((x) >> 5) & 0x1) -#define C_0287A0_RT5_ENABLE 0xFFFFFFDF -#define S_0287A0_RT6_ENABLE(x) (((x) & 0x1) << 6) -#define G_0287A0_RT6_ENABLE(x) (((x) >> 6) & 0x1) -#define C_0287A0_RT6_ENABLE 0xFFFFFFBF -#define S_0287A0_RT7_ENABLE(x) (((x) & 0x1) << 7) -#define G_0287A0_RT7_ENABLE(x) (((x) >> 7) & 0x1) -#define C_0287A0_RT7_ENABLE 0xFFFFFF7F -#define R_028894_SQ_PGM_START_FS 0x028894 -#define S_028894_PGM_START(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028894_PGM_START(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028894_PGM_START 0x00000000 -#define R_0288A4_SQ_PGM_RESOURCES_FS 0x0288A4 -#define S_0288A4_NUM_GPRS(x) (((x) & 0xFF) << 0) -#define G_0288A4_NUM_GPRS(x) (((x) >> 0) & 0xFF) -#define C_0288A4_NUM_GPRS 0xFFFFFF00 -#define S_0288A4_STACK_SIZE(x) (((x) & 0xFF) << 8) -#define G_0288A4_STACK_SIZE(x) (((x) >> 8) & 0xFF) -#define C_0288A4_STACK_SIZE 0xFFFF00FF -#define S_0288A4_DX10_CLAMP(x) (((x) & 0x1) << 21) -#define G_0288A4_DX10_CLAMP(x) (((x) >> 21) & 0x1) -#define C_0288A4_DX10_CLAMP 0xFFDFFFFF -#define R_0288A8_SQ_ESGS_RING_ITEMSIZE 0x0288A8 -#define S_0288A8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288A8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288A8_ITEMSIZE 0xFFFF8000 -#define R_0288AC_SQ_GSVS_RING_ITEMSIZE 0x0288AC -#define S_0288AC_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288AC_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288AC_ITEMSIZE 0xFFFF8000 -#define R_0288B0_SQ_ESTMP_RING_ITEMSIZE 0x0288B0 -#define S_0288B0_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B0_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B0_ITEMSIZE 0xFFFF8000 -#define R_0288B4_SQ_GSTMP_RING_ITEMSIZE 0x0288B4 -#define S_0288B4_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B4_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B4_ITEMSIZE 0xFFFF8000 -#define R_0288B8_SQ_VSTMP_RING_ITEMSIZE 0x0288B8 -#define S_0288B8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B8_ITEMSIZE 0xFFFF8000 -#define R_0288BC_SQ_PSTMP_RING_ITEMSIZE 0x0288BC -#define S_0288BC_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288BC_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288BC_ITEMSIZE 0xFFFF8000 -#define R_0288C0_SQ_FBUF_RING_ITEMSIZE 0x0288C0 -#define S_0288C0_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C0_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C0_ITEMSIZE 0xFFFF8000 -#define R_0288C4_SQ_REDUC_RING_ITEMSIZE 0x0288C4 -#define S_0288C4_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C4_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C4_ITEMSIZE 0xFFFF8000 -#define R_0288C8_SQ_GS_VERT_ITEMSIZE 0x0288C8 -#define S_0288C8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C8_ITEMSIZE 0xFFFF8000 -#define R_0288DC_SQ_PGM_CF_OFFSET_FS 0x0288DC -#define S_0288DC_PGM_CF_OFFSET(x) (((x) & 0xFFFFF) << 0) -#define G_0288DC_PGM_CF_OFFSET(x) (((x) >> 0) & 0xFFFFF) -#define C_0288DC_PGM_CF_OFFSET 0xFFF00000 -#define R_028A10_VGT_OUTPUT_PATH_CNTL 0x028A10 -#define S_028A10_PATH_SELECT(x) (((x) & 0x3) << 0) -#define G_028A10_PATH_SELECT(x) (((x) >> 0) & 0x3) -#define C_028A10_PATH_SELECT 0xFFFFFFFC -#define R_028A14_VGT_HOS_CNTL 0x028A14 -#define S_028A14_TESS_MODE(x) (((x) & 0x3) << 0) -#define G_028A14_TESS_MODE(x) (((x) >> 0) & 0x3) -#define C_028A14_TESS_MODE 0xFFFFFFFC -#define R_028A18_VGT_HOS_MAX_TESS_LEVEL 0x028A18 -#define S_028A18_MAX_TESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028A18_MAX_TESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028A18_MAX_TESS 0x00000000 -#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL 0x028A1C -#define S_028A1C_MIN_TESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028A1C_MIN_TESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028A1C_MIN_TESS 0x00000000 -#define R_028A20_VGT_HOS_REUSE_DEPTH 0x028A20 -#define S_028A20_REUSE_DEPTH(x) (((x) & 0xFF) << 0) -#define G_028A20_REUSE_DEPTH(x) (((x) >> 0) & 0xFF) -#define C_028A20_REUSE_DEPTH 0xFFFFFF00 -#define R_028A24_VGT_GROUP_PRIM_TYPE 0x028A24 -#define S_028A24_PRIM_TYPE(x) (((x) & 0x1F) << 0) -#define G_028A24_PRIM_TYPE(x) (((x) >> 0) & 0x1F) -#define C_028A24_PRIM_TYPE 0xFFFFFFE0 -#define S_028A24_RETAIN_ORDER(x) (((x) & 0x1) << 14) -#define G_028A24_RETAIN_ORDER(x) (((x) >> 14) & 0x1) -#define C_028A24_RETAIN_ORDER 0xFFFFBFFF -#define S_028A24_RETAIN_QUADS(x) (((x) & 0x1) << 15) -#define G_028A24_RETAIN_QUADS(x) (((x) >> 15) & 0x1) -#define C_028A24_RETAIN_QUADS 0xFFFF7FFF -#define S_028A24_PRIM_ORDER(x) (((x) & 0x7) << 16) -#define G_028A24_PRIM_ORDER(x) (((x) >> 16) & 0x7) -#define C_028A24_PRIM_ORDER 0xFFF8FFFF -#define R_028A28_VGT_GROUP_FIRST_DECR 0x028A28 -#define S_028A28_FIRST_DECR(x) (((x) & 0xF) << 0) -#define G_028A28_FIRST_DECR(x) (((x) >> 0) & 0xF) -#define C_028A28_FIRST_DECR 0xFFFFFFF0 -#define R_028A2C_VGT_GROUP_DECR 0x028A2C -#define S_028A2C_DECR(x) (((x) & 0xF) << 0) -#define G_028A2C_DECR(x) (((x) >> 0) & 0xF) -#define C_028A2C_DECR 0xFFFFFFF0 -#define R_028A30_VGT_GROUP_VECT_0_CNTL 0x028A30 -#define S_028A30_COMP_X_EN(x) (((x) & 0x1) << 0) -#define G_028A30_COMP_X_EN(x) (((x) >> 0) & 0x1) -#define C_028A30_COMP_X_EN 0xFFFFFFFE -#define S_028A30_COMP_Y_EN(x) (((x) & 0x1) << 1) -#define G_028A30_COMP_Y_EN(x) (((x) >> 1) & 0x1) -#define C_028A30_COMP_Y_EN 0xFFFFFFFD -#define S_028A30_COMP_Z_EN(x) (((x) & 0x1) << 2) -#define G_028A30_COMP_Z_EN(x) (((x) >> 2) & 0x1) -#define C_028A30_COMP_Z_EN 0xFFFFFFFB -#define S_028A30_COMP_W_EN(x) (((x) & 0x1) << 3) -#define G_028A30_COMP_W_EN(x) (((x) >> 3) & 0x1) -#define C_028A30_COMP_W_EN 0xFFFFFFF7 -#define S_028A30_STRIDE(x) (((x) & 0xFF) << 8) -#define G_028A30_STRIDE(x) (((x) >> 8) & 0xFF) -#define C_028A30_STRIDE 0xFFFF00FF -#define S_028A30_SHIFT(x) (((x) & 0xFF) << 16) -#define G_028A30_SHIFT(x) (((x) >> 16) & 0xFF) -#define C_028A30_SHIFT 0xFF00FFFF -#define R_028A34_VGT_GROUP_VECT_1_CNTL 0x028A34 -#define S_028A34_COMP_X_EN(x) (((x) & 0x1) << 0) -#define G_028A34_COMP_X_EN(x) (((x) >> 0) & 0x1) -#define C_028A34_COMP_X_EN 0xFFFFFFFE -#define S_028A34_COMP_Y_EN(x) (((x) & 0x1) << 1) -#define G_028A34_COMP_Y_EN(x) (((x) >> 1) & 0x1) -#define C_028A34_COMP_Y_EN 0xFFFFFFFD -#define S_028A34_COMP_Z_EN(x) (((x) & 0x1) << 2) -#define G_028A34_COMP_Z_EN(x) (((x) >> 2) & 0x1) -#define C_028A34_COMP_Z_EN 0xFFFFFFFB -#define S_028A34_COMP_W_EN(x) (((x) & 0x1) << 3) -#define G_028A34_COMP_W_EN(x) (((x) >> 3) & 0x1) -#define C_028A34_COMP_W_EN 0xFFFFFFF7 -#define S_028A34_STRIDE(x) (((x) & 0xFF) << 8) -#define G_028A34_STRIDE(x) (((x) >> 8) & 0xFF) -#define C_028A34_STRIDE 0xFFFF00FF -#define S_028A34_SHIFT(x) (((x) & 0xFF) << 16) -#define G_028A34_SHIFT(x) (((x) >> 16) & 0xFF) -#define C_028A34_SHIFT 0xFF00FFFF -#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL 0x028A38 -#define S_028A38_X_CONV(x) (((x) & 0xF) << 0) -#define G_028A38_X_CONV(x) (((x) >> 0) & 0xF) -#define C_028A38_X_CONV 0xFFFFFFF0 -#define S_028A38_X_OFFSET(x) (((x) & 0xF) << 4) -#define G_028A38_X_OFFSET(x) (((x) >> 4) & 0xF) -#define C_028A38_X_OFFSET 0xFFFFFF0F -#define S_028A38_Y_CONV(x) (((x) & 0xF) << 8) -#define G_028A38_Y_CONV(x) (((x) >> 8) & 0xF) -#define C_028A38_Y_CONV 0xFFFFF0FF -#define S_028A38_Y_OFFSET(x) (((x) & 0xF) << 12) -#define G_028A38_Y_OFFSET(x) (((x) >> 12) & 0xF) -#define C_028A38_Y_OFFSET 0xFFFF0FFF -#define S_028A38_Z_CONV(x) (((x) & 0xF) << 16) -#define G_028A38_Z_CONV(x) (((x) >> 16) & 0xF) -#define C_028A38_Z_CONV 0xFFF0FFFF -#define S_028A38_Z_OFFSET(x) (((x) & 0xF) << 20) -#define G_028A38_Z_OFFSET(x) (((x) >> 20) & 0xF) -#define C_028A38_Z_OFFSET 0xFF0FFFFF -#define S_028A38_W_CONV(x) (((x) & 0xF) << 24) -#define G_028A38_W_CONV(x) (((x) >> 24) & 0xF) -#define C_028A38_W_CONV 0xF0FFFFFF -#define S_028A38_W_OFFSET(x) (((x) & 0xF) << 28) -#define G_028A38_W_OFFSET(x) (((x) >> 28) & 0xF) -#define C_028A38_W_OFFSET 0x0FFFFFFF -#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x028A3C -#define S_028A3C_X_CONV(x) (((x) & 0xF) << 0) -#define G_028A3C_X_CONV(x) (((x) >> 0) & 0xF) -#define C_028A3C_X_CONV 0xFFFFFFF0 -#define S_028A3C_X_OFFSET(x) (((x) & 0xF) << 4) -#define G_028A3C_X_OFFSET(x) (((x) >> 4) & 0xF) -#define C_028A3C_X_OFFSET 0xFFFFFF0F -#define S_028A3C_Y_CONV(x) (((x) & 0xF) << 8) -#define G_028A3C_Y_CONV(x) (((x) >> 8) & 0xF) -#define C_028A3C_Y_CONV 0xFFFFF0FF -#define S_028A3C_Y_OFFSET(x) (((x) & 0xF) << 12) -#define G_028A3C_Y_OFFSET(x) (((x) >> 12) & 0xF) -#define C_028A3C_Y_OFFSET 0xFFFF0FFF -#define S_028A3C_Z_CONV(x) (((x) & 0xF) << 16) -#define G_028A3C_Z_CONV(x) (((x) >> 16) & 0xF) -#define C_028A3C_Z_CONV 0xFFF0FFFF -#define S_028A3C_Z_OFFSET(x) (((x) & 0xF) << 20) -#define G_028A3C_Z_OFFSET(x) (((x) >> 20) & 0xF) -#define C_028A3C_Z_OFFSET 0xFF0FFFFF -#define S_028A3C_W_CONV(x) (((x) & 0xF) << 24) -#define G_028A3C_W_CONV(x) (((x) >> 24) & 0xF) -#define C_028A3C_W_CONV 0xF0FFFFFF -#define S_028A3C_W_OFFSET(x) (((x) & 0xF) << 28) -#define G_028A3C_W_OFFSET(x) (((x) >> 28) & 0xF) -#define C_028A3C_W_OFFSET 0x0FFFFFFF -#define R_028A40_VGT_GS_MODE 0x028A40 -#define S_028A40_MODE(x) (((x) & 0x3) << 0) -#define G_028A40_MODE(x) (((x) >> 0) & 0x3) -#define C_028A40_MODE 0xFFFFFFFC -#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) -#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) -#define C_028A40_ES_PASSTHRU 0xFFFFFFFB -#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) -#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) -#define C_028A40_CUT_MODE 0xFFFFFFE7 -#define R_028A4C_PA_SC_MODE_CNTL 0x028A4C -#define S_028A4C_MSAA_ENABLE(x) (((x) & 0x1) << 0) -#define G_028A4C_MSAA_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028A4C_MSAA_ENABLE 0xFFFFFFFE -#define S_028A4C_CLIPRECT_ENABLE(x) (((x) & 0x1) << 1) -#define G_028A4C_CLIPRECT_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028A4C_CLIPRECT_ENABLE 0xFFFFFFFD -#define S_028A4C_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 2) -#define G_028A4C_LINE_STIPPLE_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028A4C_LINE_STIPPLE_ENABLE 0xFFFFFFFB -#define S_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x) (((x) & 0x1) << 3) -#define G_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x) (((x) >> 3) & 0x1) -#define C_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB 0xFFFFFFF7 -#define S_028A4C_WALK_ORDER_ENABLE(x) (((x) & 0x1) << 4) -#define G_028A4C_WALK_ORDER_ENABLE(x) (((x) >> 4) & 0x1) -#define C_028A4C_WALK_ORDER_ENABLE 0xFFFFFFEF -#define S_028A4C_HALVE_DETAIL_SAMPLE_PERF(x) (((x) & 0x1) << 5) -#define G_028A4C_HALVE_DETAIL_SAMPLE_PERF(x) (((x) >> 5) & 0x1) -#define C_028A4C_HALVE_DETAIL_SAMPLE_PERF 0xFFFFFFDF -#define S_028A4C_WALK_SIZE(x) (((x) & 0x1) << 6) -#define G_028A4C_WALK_SIZE(x) (((x) >> 6) & 0x1) -#define C_028A4C_WALK_SIZE 0xFFFFFFBF -#define S_028A4C_WALK_ALIGNMENT(x) (((x) & 0x1) << 7) -#define G_028A4C_WALK_ALIGNMENT(x) (((x) >> 7) & 0x1) -#define C_028A4C_WALK_ALIGNMENT 0xFFFFFF7F -#define S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x) (((x) & 0x1) << 8) -#define G_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x) (((x) >> 8) & 0x1) -#define C_028A4C_WALK_ALIGN8_PRIM_FITS_ST 0xFFFFFEFF -#define S_028A4C_TILE_COVER_NO_SCISSOR(x) (((x) & 0x1) << 9) -#define G_028A4C_TILE_COVER_NO_SCISSOR(x) (((x) >> 9) & 0x1) -#define C_028A4C_TILE_COVER_NO_SCISSOR 0xFFFFFDFF -#define S_028A4C_KILL_PIX_POST_HI_Z(x) (((x) & 0x1) << 10) -#define G_028A4C_KILL_PIX_POST_HI_Z(x) (((x) >> 10) & 0x1) -#define C_028A4C_KILL_PIX_POST_HI_Z 0xFFFFFBFF -#define S_028A4C_KILL_PIX_POST_DETAIL_MASK(x) (((x) & 0x1) << 11) -#define G_028A4C_KILL_PIX_POST_DETAIL_MASK(x) (((x) >> 11) & 0x1) -#define C_028A4C_KILL_PIX_POST_DETAIL_MASK 0xFFFFF7FF -#define S_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x) (((x) & 0x1) << 12) -#define G_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x) (((x) >> 12) & 0x1) -#define C_028A4C_MULTI_CHIP_SUPERTILE_ENABLE 0xFFFFEFFF -#define S_028A4C_TILE_COVER_DISABLE(x) (((x) & 0x1) << 13) -#define G_028A4C_TILE_COVER_DISABLE(x) (((x) >> 13) & 0x1) -#define C_028A4C_TILE_COVER_DISABLE 0xFFFFDFFF -#define S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 14) -#define G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) >> 14) & 0x1) -#define C_028A4C_FORCE_EOV_CNTDWN_ENABLE 0xFFFFBFFF -#define S_028A4C_FORCE_EOV_TILE_ENABLE(x) (((x) & 0x1) << 15) -#define G_028A4C_FORCE_EOV_TILE_ENABLE(x) (((x) >> 15) & 0x1) -#define C_028A4C_FORCE_EOV_TILE_ENABLE 0xFFFF7FFF -#define S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) & 0x1) << 16) -#define G_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) >> 16) & 0x1) -#define C_028A4C_FORCE_EOV_REZ_ENABLE 0xFFFEFFFF -#define S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 17) -#define G_028A4C_PS_ITER_SAMPLE(x) (((x) >> 17) & 0x1) -#define C_028A4C_PS_ITER_SAMPLE 0xFFFDFFFF -#define R_028A84_VGT_PRIMITIVEID_EN 0x028A84 -#define S_028A84_PRIMITIVEID_EN(x) (((x) & 0x1) << 0) -#define G_028A84_PRIMITIVEID_EN(x) (((x) >> 0) & 0x1) -#define C_028A84_PRIMITIVEID_EN 0xFFFFFFFE -#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN 0x028A94 -#define S_028A94_RESET_EN(x) (((x) & 0x1) << 0) -#define G_028A94_RESET_EN(x) (((x) >> 0) & 0x1) -#define C_028A94_RESET_EN 0xFFFFFFFE -#define R_028AA0_VGT_INSTANCE_STEP_RATE_0 0x028AA0 -#define S_028AA0_STEP_RATE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028AA0_STEP_RATE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028AA0_STEP_RATE 0x00000000 -#define R_028AA4_VGT_INSTANCE_STEP_RATE_1 0x028AA4 -#define S_028AA4_STEP_RATE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028AA4_STEP_RATE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028AA4_STEP_RATE 0x00000000 -#define R_028AB0_VGT_STRMOUT_EN 0x028AB0 -#define S_028AB0_STREAMOUT(x) (((x) & 0x1) << 0) -#define G_028AB0_STREAMOUT(x) (((x) >> 0) & 0x1) -#define C_028AB0_STREAMOUT 0xFFFFFFFE -#define R_028AB4_VGT_REUSE_OFF 0x028AB4 -#define S_028AB4_REUSE_OFF(x) (((x) & 0x1) << 0) -#define G_028AB4_REUSE_OFF(x) (((x) >> 0) & 0x1) -#define C_028AB4_REUSE_OFF 0xFFFFFFFE -#define R_028AB8_VGT_VTX_CNT_EN 0x028AB8 -#define S_028AB8_VTX_CNT_EN(x) (((x) & 0x1) << 0) -#define G_028AB8_VTX_CNT_EN(x) (((x) >> 0) & 0x1) -#define C_028AB8_VTX_CNT_EN 0xFFFFFFFE -#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20 -#define S_028B20_BUFFER_0_EN(x) (((x) & 0x1) << 0) -#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1) -#define C_028B20_BUFFER_0_EN 0xFFFFFFFE -#define S_028B20_BUFFER_1_EN(x) (((x) & 0x1) << 1) -#define G_028B20_BUFFER_1_EN(x) (((x) >> 1) & 0x1) -#define C_028B20_BUFFER_1_EN 0xFFFFFFFD -#define S_028B20_BUFFER_2_EN(x) (((x) & 0x1) << 2) -#define G_028B20_BUFFER_2_EN(x) (((x) >> 2) & 0x1) -#define C_028B20_BUFFER_2_EN 0xFFFFFFFB -#define S_028B20_BUFFER_3_EN(x) (((x) & 0x1) << 3) -#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1) -#define C_028B20_BUFFER_3_EN 0xFFFFFFF7 -#define R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 0x028C20 -#define S_028C20_S4_X(x) (((x) & 0xF) << 0) -#define G_028C20_S4_X(x) (((x) >> 0) & 0xF) -#define C_028C20_S4_X 0xFFFFFFF0 -#define S_028C20_S4_Y(x) (((x) & 0xF) << 4) -#define G_028C20_S4_Y(x) (((x) >> 4) & 0xF) -#define C_028C20_S4_Y 0xFFFFFF0F -#define S_028C20_S5_X(x) (((x) & 0xF) << 8) -#define G_028C20_S5_X(x) (((x) >> 8) & 0xF) -#define C_028C20_S5_X 0xFFFFF0FF -#define S_028C20_S5_Y(x) (((x) & 0xF) << 12) -#define G_028C20_S5_Y(x) (((x) >> 12) & 0xF) -#define C_028C20_S5_Y 0xFFFF0FFF -#define S_028C20_S6_X(x) (((x) & 0xF) << 16) -#define G_028C20_S6_X(x) (((x) >> 16) & 0xF) -#define C_028C20_S6_X 0xFFF0FFFF -#define S_028C20_S6_Y(x) (((x) & 0xF) << 20) -#define G_028C20_S6_Y(x) (((x) >> 20) & 0xF) -#define C_028C20_S6_Y 0xFF0FFFFF -#define S_028C20_S7_X(x) (((x) & 0xF) << 24) -#define G_028C20_S7_X(x) (((x) >> 24) & 0xF) -#define C_028C20_S7_X 0xF0FFFFFF -#define S_028C20_S7_Y(x) (((x) & 0xF) << 28) -#define G_028C20_S7_Y(x) (((x) >> 28) & 0xF) -#define C_028C20_S7_Y 0x0FFFFFFF -#define R_028C30_CB_CLRCMP_CONTROL 0x028C30 -#define S_028C30_CLRCMP_FCN_SRC(x) (((x) & 0x7) << 0) -#define G_028C30_CLRCMP_FCN_SRC(x) (((x) >> 0) & 0x7) -#define C_028C30_CLRCMP_FCN_SRC 0xFFFFFFF8 -#define S_028C30_CLRCMP_FCN_DST(x) (((x) & 0x7) << 8) -#define G_028C30_CLRCMP_FCN_DST(x) (((x) >> 8) & 0x7) -#define C_028C30_CLRCMP_FCN_DST 0xFFFFF8FF -#define S_028C30_CLRCMP_FCN_SEL(x) (((x) & 0x3) << 24) -#define G_028C30_CLRCMP_FCN_SEL(x) (((x) >> 24) & 0x3) -#define C_028C30_CLRCMP_FCN_SEL 0xFCFFFFFF -#define R_028C34_CB_CLRCMP_SRC 0x028C34 -#define S_028C34_CLRCMP_SRC(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C34_CLRCMP_SRC(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C34_CLRCMP_SRC 0x00000000 -#define R_028C38_CB_CLRCMP_DST 0x028C38 -#define S_028C38_CLRCMP_DST(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C38_CLRCMP_DST(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C38_CLRCMP_DST 0x00000000 -#define R_028C3C_CB_CLRCMP_MSK 0x028C3C -#define S_028C3C_CLRCMP_MSK(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C3C_CLRCMP_MSK(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C3C_CLRCMP_MSK 0x00000000 -#define R_0085F0_CP_COHER_CNTL 0x0085F0 -#define S_0085F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0) -#define G_0085F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1) -#define C_0085F0_DEST_BASE_0_ENA 0xFFFFFFFE -#define S_0085F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1) -#define G_0085F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1) -#define C_0085F0_DEST_BASE_1_ENA 0xFFFFFFFD -#define S_0085F0_SO0_DEST_BASE_ENA(x) (((x) & 0x1) << 2) -#define G_0085F0_SO0_DEST_BASE_ENA(x) (((x) >> 2) & 0x1) -#define C_0085F0_SO0_DEST_BASE_ENA 0xFFFFFFFB -#define S_0085F0_SO1_DEST_BASE_ENA(x) (((x) & 0x1) << 3) -#define G_0085F0_SO1_DEST_BASE_ENA(x) (((x) >> 3) & 0x1) -#define C_0085F0_SO1_DEST_BASE_ENA 0xFFFFFFF7 -#define S_0085F0_SO2_DEST_BASE_ENA(x) (((x) & 0x1) << 4) -#define G_0085F0_SO2_DEST_BASE_ENA(x) (((x) >> 4) & 0x1) -#define C_0085F0_SO2_DEST_BASE_ENA 0xFFFFFFEF -#define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) -#define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) -#define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF -#define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) -#define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) -#define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF -#define S_0085F0_CB1_DEST_BASE_ENA(x) (((x) & 0x1) << 7) -#define G_0085F0_CB1_DEST_BASE_ENA(x) (((x) >> 7) & 0x1) -#define C_0085F0_CB1_DEST_BASE_ENA 0xFFFFFF7F -#define S_0085F0_CB2_DEST_BASE_ENA(x) (((x) & 0x1) << 8) -#define G_0085F0_CB2_DEST_BASE_ENA(x) (((x) >> 8) & 0x1) -#define C_0085F0_CB2_DEST_BASE_ENA 0xFFFFFEFF -#define S_0085F0_CB3_DEST_BASE_ENA(x) (((x) & 0x1) << 9) -#define G_0085F0_CB3_DEST_BASE_ENA(x) (((x) >> 9) & 0x1) -#define C_0085F0_CB3_DEST_BASE_ENA 0xFFFFFDFF -#define S_0085F0_CB4_DEST_BASE_ENA(x) (((x) & 0x1) << 10) -#define G_0085F0_CB4_DEST_BASE_ENA(x) (((x) >> 10) & 0x1) -#define C_0085F0_CB4_DEST_BASE_ENA 0xFFFFFBFF -#define S_0085F0_CB5_DEST_BASE_ENA(x) (((x) & 0x1) << 11) -#define G_0085F0_CB5_DEST_BASE_ENA(x) (((x) >> 11) & 0x1) -#define C_0085F0_CB5_DEST_BASE_ENA 0xFFFFF7FF -#define S_0085F0_CB6_DEST_BASE_ENA(x) (((x) & 0x1) << 12) -#define G_0085F0_CB6_DEST_BASE_ENA(x) (((x) >> 12) & 0x1) -#define C_0085F0_CB6_DEST_BASE_ENA 0xFFFFEFFF -#define S_0085F0_CB7_DEST_BASE_ENA(x) (((x) & 0x1) << 13) -#define G_0085F0_CB7_DEST_BASE_ENA(x) (((x) >> 13) & 0x1) -#define C_0085F0_CB7_DEST_BASE_ENA 0xFFFFDFFF -#define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) -#define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) -#define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF -#define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15) -#define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) -#define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF -#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) -#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) -#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF -#define S_0085F0_VC_ACTION_ENA(x) (((x) & 0x1) << 24) -#define G_0085F0_VC_ACTION_ENA(x) (((x) >> 24) & 0x1) -#define C_0085F0_VC_ACTION_ENA 0xFEFFFFFF -#define S_0085F0_CB_ACTION_ENA(x) (((x) & 0x1) << 25) -#define G_0085F0_CB_ACTION_ENA(x) (((x) >> 25) & 0x1) -#define C_0085F0_CB_ACTION_ENA 0xFDFFFFFF -#define S_0085F0_DB_ACTION_ENA(x) (((x) & 0x1) << 26) -#define G_0085F0_DB_ACTION_ENA(x) (((x) >> 26) & 0x1) -#define C_0085F0_DB_ACTION_ENA 0xFBFFFFFF -#define S_0085F0_SH_ACTION_ENA(x) (((x) & 0x1) << 27) -#define G_0085F0_SH_ACTION_ENA(x) (((x) >> 27) & 0x1) -#define C_0085F0_SH_ACTION_ENA 0xF7FFFFFF -#define S_0085F0_SMX_ACTION_ENA(x) (((x) & 0x1) << 28) -#define G_0085F0_SMX_ACTION_ENA(x) (((x) >> 28) & 0x1) -#define C_0085F0_SMX_ACTION_ENA 0xEFFFFFFF -#define S_0085F0_CR0_ACTION_ENA(x) (((x) & 0x1) << 29) -#define G_0085F0_CR0_ACTION_ENA(x) (((x) >> 29) & 0x1) -#define C_0085F0_CR0_ACTION_ENA 0xDFFFFFFF -#define S_0085F0_CR1_ACTION_ENA(x) (((x) & 0x1) << 30) -#define G_0085F0_CR1_ACTION_ENA(x) (((x) >> 30) & 0x1) -#define C_0085F0_CR1_ACTION_ENA 0xBFFFFFFF -#define S_0085F0_CR2_ACTION_ENA(x) (((x) & 0x1) << 31) -#define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1) -#define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF - - -#define R_02812C_CB_CLEAR_ALPHA 0x02812C -#define S_02812C_CLEAR_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02812C_CLEAR_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02812C_CLEAR_ALPHA 0x00000000 -#define R_028128_CB_CLEAR_BLUE 0x028128 -#define S_028128_CLEAR_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028128_CLEAR_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028128_CLEAR_BLUE 0x00000000 -#define R_028124_CB_CLEAR_GREEN 0x028124 -#define S_028124_CLEAR_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028124_CLEAR_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028124_CLEAR_GREEN 0x00000000 -#define R_028120_CB_CLEAR_RED 0x028120 -#define S_028120_CLEAR_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028120_CLEAR_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028120_CLEAR_RED 0x00000000 -#define R_02842C_CB_FOG_BLUE 0x02842C -#define S_02842C_FOG_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02842C_FOG_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02842C_FOG_BLUE 0x00000000 -#define R_028428_CB_FOG_GREEN 0x028428 -#define S_028428_FOG_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028428_FOG_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028428_FOG_GREEN 0x00000000 -#define R_028424_CB_FOG_RED 0x028424 -#define S_028424_FOG_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028424_FOG_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028424_FOG_RED 0x00000000 -#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 -#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) -#define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) -#define C_03C000_CLAMP_X 0xFFFFFFF8 -#define S_03C000_CLAMP_Y(x) (((x) & 0x7) << 3) -#define G_03C000_CLAMP_Y(x) (((x) >> 3) & 0x7) -#define C_03C000_CLAMP_Y 0xFFFFFFC7 -#define S_03C000_CLAMP_Z(x) (((x) & 0x7) << 6) -#define G_03C000_CLAMP_Z(x) (((x) >> 6) & 0x7) -#define C_03C000_CLAMP_Z 0xFFFFFE3F -#define S_03C000_XY_MAG_FILTER(x) (((x) & 0x7) << 9) -#define G_03C000_XY_MAG_FILTER(x) (((x) >> 9) & 0x7) -#define C_03C000_XY_MAG_FILTER 0xFFFFF1FF -#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12) -#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7) -#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF -#define S_03C000_Z_FILTER(x) (((x) & 0x3) << 15) -#define G_03C000_Z_FILTER(x) (((x) >> 15) & 0x3) -#define C_03C000_Z_FILTER 0xFFFE7FFF -#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17) -#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3) -#define C_03C000_MIP_FILTER 0xFFF9FFFF -#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22) -#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3) -#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF -#define S_03C000_POINT_SAMPLING_CLAMP(x) (((x) & 0x1) << 24) -#define G_03C000_POINT_SAMPLING_CLAMP(x) (((x) >> 24) & 0x1) -#define C_03C000_POINT_SAMPLING_CLAMP 0xFEFFFFFF -#define S_03C000_TEX_ARRAY_OVERRIDE(x) (((x) & 0x1) << 25) -#define G_03C000_TEX_ARRAY_OVERRIDE(x) (((x) >> 25) & 0x1) -#define C_03C000_TEX_ARRAY_OVERRIDE 0xFDFFFFFF -#define S_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) & 0x7) << 26) -#define G_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) >> 26) & 0x7) -#define C_03C000_DEPTH_COMPARE_FUNCTION 0xE3FFFFFF -#define S_03C000_CHROMA_KEY(x) (((x) & 0x3) << 29) -#define G_03C000_CHROMA_KEY(x) (((x) >> 29) & 0x3) -#define C_03C000_CHROMA_KEY 0x9FFFFFFF -#define S_03C000_LOD_USES_MINOR_AXIS(x) (((x) & 0x1) << 31) -#define G_03C000_LOD_USES_MINOR_AXIS(x) (((x) >> 31) & 0x1) -#define C_03C000_LOD_USES_MINOR_AXIS 0x7FFFFFFF -#define R_03C004_SQ_TEX_SAMPLER_WORD1_0 0x03C004 -#define S_03C004_MIN_LOD(x) (((x) & 0x3FF) << 0) -#define G_03C004_MIN_LOD(x) (((x) >> 0) & 0x3FF) -#define C_03C004_MIN_LOD 0xFFFFFC00 -#define S_03C004_MAX_LOD(x) (((x) & 0x3FF) << 10) -#define G_03C004_MAX_LOD(x) (((x) >> 10) & 0x3FF) -#define C_03C004_MAX_LOD 0xFFF003FF -#define S_03C004_LOD_BIAS(x) (((x) & 0xFFF) << 20) -#define G_03C004_LOD_BIAS(x) (((x) >> 20) & 0xFFF) -#define C_03C004_LOD_BIAS 0x000FFFFF -#define R_03C008_SQ_TEX_SAMPLER_WORD2_0 0x03C008 -#define S_03C008_LOD_BIAS_SEC(x) (((x) & 0xFFF) << 0) -#define G_03C008_LOD_BIAS_SEC(x) (((x) >> 0) & 0xFFF) -#define C_03C008_LOD_BIAS_SEC 0xFFFFF000 -#define S_03C008_MC_COORD_TRUNCATE(x) (((x) & 0x1) << 12) -#define G_03C008_MC_COORD_TRUNCATE(x) (((x) >> 12) & 0x1) -#define C_03C008_MC_COORD_TRUNCATE 0xFFFFEFFF -#define S_03C008_FORCE_DEGAMMA(x) (((x) & 0x1) << 13) -#define G_03C008_FORCE_DEGAMMA(x) (((x) >> 13) & 0x1) -#define C_03C008_FORCE_DEGAMMA 0xFFFFDFFF -#define S_03C008_HIGH_PRECISION_FILTER(x) (((x) & 0x1) << 14) -#define G_03C008_HIGH_PRECISION_FILTER(x) (((x) >> 14) & 0x1) -#define C_03C008_HIGH_PRECISION_FILTER 0xFFFFBFFF -#define S_03C008_PERF_MIP(x) (((x) & 0x7) << 15) -#define G_03C008_PERF_MIP(x) (((x) >> 15) & 0x7) -#define C_03C008_PERF_MIP 0xFFFC7FFF -#define S_03C008_PERF_Z(x) (((x) & 0x3) << 18) -#define G_03C008_PERF_Z(x) (((x) >> 18) & 0x3) -#define C_03C008_PERF_Z 0xFFF3FFFF -#define S_03C008_FETCH_4(x) (((x) & 0x1) << 26) -#define G_03C008_FETCH_4(x) (((x) >> 26) & 0x1) -#define C_03C008_FETCH_4 0xFBFFFFFF -#define S_03C008_SAMPLE_IS_PCF(x) (((x) & 0x1) << 27) -#define G_03C008_SAMPLE_IS_PCF(x) (((x) >> 27) & 0x1) -#define C_03C008_SAMPLE_IS_PCF 0xF7FFFFFF -#define S_03C008_TYPE(x) (((x) & 0x1) << 31) -#define G_03C008_TYPE(x) (((x) >> 31) & 0x1) -#define C_03C008_TYPE 0x7FFFFFFF -#define R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA 0x00A40C -#define S_00A40C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A40C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A40C_BORDER_ALPHA 0x00000000 -#define R_00A408_TD_PS_SAMPLER0_BORDER_BLUE 0x00A408 -#define S_00A408_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A408_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A408_BORDER_BLUE 0x00000000 -#define R_00A404_TD_PS_SAMPLER0_BORDER_GREEN 0x00A404 -#define S_00A404_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A404_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A404_BORDER_GREEN 0x00000000 -#define R_00A400_TD_PS_SAMPLER0_BORDER_RED 0x00A400 -#define S_00A400_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A400_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A400_BORDER_RED 0x00000000 -#define R_00A60C_TD_VS_SAMPLER0_BORDER_ALPHA 0x00A60C -#define S_00A60C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A60C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A60C_BORDER_ALPHA 0x00000000 -#define R_00A608_TD_VS_SAMPLER0_BORDER_BLUE 0x00A608 -#define S_00A608_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A608_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A608_BORDER_BLUE 0x00000000 -#define R_00A604_TD_VS_SAMPLER0_BORDER_GREEN 0x00A604 -#define S_00A604_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A604_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A604_BORDER_GREEN 0x00000000 -#define R_00A600_TD_VS_SAMPLER0_BORDER_RED 0x00A600 -#define S_00A600_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A600_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A600_BORDER_RED 0x00000000 -#define R_00A80C_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A80C -#define S_00A80C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A80C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A80C_BORDER_ALPHA 0x00000000 -#define R_00A808_TD_GS_SAMPLER0_BORDER_BLUE 0x00A808 -#define S_00A808_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A808_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A808_BORDER_BLUE 0x00000000 -#define R_00A804_TD_GS_SAMPLER0_BORDER_GREEN 0x00A804 -#define S_00A804_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A804_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A804_BORDER_GREEN 0x00000000 -#define R_00A800_TD_GS_SAMPLER0_BORDER_RED 0x00A800 -#define S_00A800_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A800_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A800_BORDER_RED 0x00000000 -#define R_030000_SQ_ALU_CONSTANT0_0 0x030000 -#define S_030000_X(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030000_X(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030000_X 0x00000000 -#define R_030004_SQ_ALU_CONSTANT1_0 0x030004 -#define S_030004_Y(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030004_Y(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030004_Y 0x00000000 -#define R_030008_SQ_ALU_CONSTANT2_0 0x030008 -#define S_030008_Z(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030008_Z(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030008_Z 0x00000000 -#define R_03000C_SQ_ALU_CONSTANT3_0 0x03000C -#define S_03000C_W(x) (((x) & 0xFFFFFFFF) << 0) -#define G_03000C_W(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_03000C_W 0x00000000 -#define R_0287E4_VGT_DMA_BASE_HI 0x0287E4 -#define R_0287E8_VGT_DMA_BASE 0x0287E8 -#define R_028E20_PA_CL_UCP0_X 0x028E20 -#define R_028E24_PA_CL_UCP0_Y 0x028E24 -#define R_028E28_PA_CL_UCP0_Z 0x028E28 -#define R_028E2C_PA_CL_UCP0_W 0x028E2C -#define R_028E30_PA_CL_UCP1_X 0x028E30 -#define R_028E34_PA_CL_UCP1_Y 0x028E34 -#define R_028E38_PA_CL_UCP1_Z 0x028E38 -#define R_028E3C_PA_CL_UCP1_W 0x028E3C -#define R_028E40_PA_CL_UCP2_X 0x028E40 -#define R_028E44_PA_CL_UCP2_Y 0x028E44 -#define R_028E48_PA_CL_UCP2_Z 0x028E48 -#define R_028E4C_PA_CL_UCP2_W 0x028E4C -#define R_028E50_PA_CL_UCP3_X 0x028E50 -#define R_028E54_PA_CL_UCP3_Y 0x028E54 -#define R_028E58_PA_CL_UCP3_Z 0x028E58 -#define R_028E5C_PA_CL_UCP3_W 0x028E5C -#define R_028E60_PA_CL_UCP4_X 0x028E60 -#define R_028E64_PA_CL_UCP4_Y 0x028E64 -#define R_028E68_PA_CL_UCP4_Z 0x028E68 -#define R_028E6C_PA_CL_UCP4_W 0x028E6C -#define R_028E70_PA_CL_UCP5_X 0x028E70 -#define R_028E74_PA_CL_UCP5_Y 0x028E74 -#define R_028E78_PA_CL_UCP5_Z 0x028E78 -#define R_028E7C_PA_CL_UCP5_W 0x028E7C -#define R_038000_RESOURCE0_WORD0 0x038000 -#define R_038004_RESOURCE0_WORD1 0x038004 -#define R_038008_RESOURCE0_WORD2 0x038008 -#define R_03800C_RESOURCE0_WORD3 0x03800C -#define R_038010_RESOURCE0_WORD4 0x038010 -#define R_038014_RESOURCE0_WORD5 0x038014 -#define R_038018_RESOURCE0_WORD6 0x038018 - -#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 -#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 -#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 -#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 - -#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 -#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 - -#endif From 233dd4953e3e6cf39f3c7a7cd898339a89d2ff86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 10 Aug 2011 02:58:40 +0200 Subject: [PATCH 342/600] u_blitter: rename util_blitter_copy_region -> util_blitter_copy_texture --- src/gallium/auxiliary/util/u_blitter.c | 16 ++++++++-------- src/gallium/auxiliary/util/u_blitter.h | 21 ++++++++++++--------- src/gallium/drivers/i915/i915_surface.c | 2 +- src/gallium/drivers/nvfx/nvfx_surface.c | 2 +- src/gallium/drivers/r300/r300_blit.c | 4 ++-- src/gallium/drivers/r600/r600_blit.c | 4 ++-- 6 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 528f344a0f7..7ecb76f38f2 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -724,14 +724,14 @@ boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, return sx1 < dx2 && sx2 > dx1 && sy1 < dy2 && sy2 > dy1; } -void util_blitter_copy_region(struct blitter_context *blitter, - struct pipe_resource *dst, - unsigned dstlevel, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned srclevel, - const struct pipe_box *srcbox, - boolean ignore_stencil) +void util_blitter_copy_texture(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dstlevel, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned srclevel, + const struct pipe_box *srcbox, + boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 41470d92bba..df6f023a638 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -126,12 +126,15 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) } /* - * These CSOs must be saved before any of the following functions is called: + * These states must be saved before any of the following functions is called: * - blend state * - depth stencil alpha state * - rasterizer state * - vertex shader + * - any other shader??? (XXX) * - fragment shader + * - vertex buffers + * - vertex elements */ /** @@ -169,14 +172,14 @@ void util_blitter_clear_depth_custom(struct blitter_context *blitter, * - fragment sampler states * - fragment sampler textures */ -void util_blitter_copy_region(struct blitter_context *blitter, - struct pipe_resource *dst, - unsigned dstlevel, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned srclevel, - const struct pipe_box *srcbox, - boolean ignore_stencil); +void util_blitter_copy_texture(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dstlevel, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned srclevel, + const struct pipe_box *srcbox, + boolean ignore_stencil); /** * Clear a region of a (color) surface to a constant value. diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index ac6e94500c8..41146be9311 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -80,7 +80,7 @@ i915_surface_copy_render(struct pipe_context *pipe, i915->saved_nr_sampler_views, i915->saved_sampler_views); - util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz, + util_blitter_copy_texture(i915->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); } diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 339906e6a63..04b0304b44f 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -288,7 +288,7 @@ nvfx_resource_copy_region(struct pipe_context *pipe, * TODO: perhaps support reinterpreting the formats */ struct blitter_context* blitter = nvfx_get_blitter(pipe, 1); - util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); + util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); nvfx_put_blitter(pipe, blitter); } else diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index e7a926829d1..ddf5448a34b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -445,8 +445,8 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box, TRUE); + util_blitter_copy_texture(r300->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r300_blitter_end(r300); } diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 35e68b6e222..e1cf585234e 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -233,8 +233,8 @@ static void r600_hw_copy_region(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_blitter_begin(ctx, R600_COPY); - util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box, TRUE); + util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r600_blitter_end(ctx); } From 363295d7209636c56ee0cb1246915b03b7f73524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 15 Aug 2011 20:52:44 +0200 Subject: [PATCH 343/600] u_blitter: restore some states conditionally --- src/gallium/auxiliary/util/u_blitter.c | 29 +++++++++++++++----------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 7ecb76f38f2..d8e46f07c88 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -26,8 +26,8 @@ /** * @file - * Blitter utility to facilitate acceleration of the clear, clear_render_target, clear_depth_stencil - * resource_copy_region functions. + * Blitter utility to facilitate acceleration of the clear, clear_render_target, + * clear_depth_stencil, and resource_copy_region functions. * * @author Marek Olšák */ @@ -197,8 +197,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&velem[0], 0, sizeof(velem[0]) * 2); for (i = 0; i < 2; i++) { velem[i].src_offset = i * 4 * sizeof(float); - velem[i].instance_divisor = 0; - velem[i].vertex_buffer_index = 0; velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]); @@ -288,26 +286,33 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) unsigned i; /* restore the state objects which are always required to be saved */ - pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state); - pipe->bind_fs_state(pipe, ctx->base.saved_fs); pipe->bind_vs_state(pipe, ctx->base.saved_vs); pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state); - ctx->base.saved_blend_state = INVALID_PTR; - ctx->base.saved_dsa_state = INVALID_PTR; ctx->base.saved_rs_state = INVALID_PTR; - ctx->base.saved_fs = INVALID_PTR; ctx->base.saved_vs = INVALID_PTR; ctx->base.saved_velem_state = INVALID_PTR; + /* restore the state objects which are required to be saved for clear/copy + */ + if (ctx->base.saved_blend_state != INVALID_PTR) { + pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); + ctx->base.saved_blend_state = INVALID_PTR; + } + if (ctx->base.saved_dsa_state != INVALID_PTR) { + pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); + ctx->base.saved_dsa_state = INVALID_PTR; + } + if (ctx->base.saved_fs != INVALID_PTR) { + pipe->bind_fs_state(pipe, ctx->base.saved_fs); + ctx->base.saved_fs = INVALID_PTR; + } + pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); pipe->set_clip_state(pipe, &ctx->base.saved_clip); - /* restore the state objects which are required to be saved before copy/fill - */ if (ctx->base.saved_fb_state.nr_cbufs != ~0) { pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state); util_unreference_framebuffer_state(&ctx->base.saved_fb_state); From 21c5d11b7ee1f6fd9d16752d8921976d9951623d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 15 Aug 2011 19:37:33 +0200 Subject: [PATCH 344/600] noop: redirect the get_param/is_format.. queries to the underlying driver --- src/gallium/drivers/noop/noop_pipe.c | 119 +++------------------------ 1 file changed, 11 insertions(+), 108 deletions(-) diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index 18308b796f3..ffc444e37d1 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -324,131 +324,34 @@ static const char *noop_get_name(struct pipe_screen* pscreen) static int noop_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_GLSL: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; - return 1; - case PIPE_CAP_DUAL_SOURCE_BLEND: - - case PIPE_CAP_SM3: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - case PIPE_CAP_DEPTH_CLAMP: - case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TIMER_QUERY: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 16; - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - - /* Fragment coordinate conventions. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - - default: - return 0; - } + return screen->get_param(screen, param); } static float noop_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - return 0.0f; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->get_paramf(screen, param); } static int noop_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: - break; - default: - return 0; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 8; - case PIPE_SHADER_CAP_MAX_INPUTS: - return 16; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; - case PIPE_SHADER_CAP_MAX_ADDRS: - return 1; - case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - default: - return 0; - } + return screen->get_shader_param(screen, shader, param); } -static boolean noop_is_format_supported(struct pipe_screen* screen, +static boolean noop_is_format_supported(struct pipe_screen* pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned usage) { - return true; + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->is_format_supported(screen, format, target, sample_count, usage); } static void noop_destroy_screen(struct pipe_screen *screen) From e3be51311834217cf35be9186e7dc9f57a10d44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 15 Aug 2011 23:37:44 +0200 Subject: [PATCH 345/600] r600g: expose ARB_ES2_compatibility by claiming fixed-point format support I also needed to make some changes in u_vbuf_mgr in order to override the caps from the driver and enable the fallback even though the driver claims the format is supported. --- src/gallium/auxiliary/util/u_vbuf_mgr.c | 46 ++++++++----------------- src/gallium/auxiliary/util/u_vbuf_mgr.h | 17 +++++++++ src/gallium/drivers/r600/r600_formats.h | 1 - src/gallium/drivers/r600/r600_pipe.c | 1 + 4 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 19eb689cfb2..d9b39e528bb 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -34,21 +34,6 @@ #include "translate/translate.h" #include "translate/translate_cache.h" -/* Hardware vertex fetcher limitations can be described by this structure. */ -struct u_vbuf_caps { - /* Vertex format CAPs. */ - /* TRUE if hardware supports it. */ - unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ - unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ - unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ - unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ - unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ - - /* Whether vertex fetches don't have to be dword-aligned. */ - /* TRUE if hardware supports it. */ - unsigned fetch_dword_unaligned:1; -}; - struct u_vbuf_mgr_elements { unsigned count; struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; @@ -69,7 +54,6 @@ struct u_vbuf_mgr_elements { struct u_vbuf_mgr_priv { struct u_vbuf_mgr b; - struct u_vbuf_caps caps; struct pipe_context *pipe; struct translate_cache *translate_cache; @@ -89,25 +73,25 @@ static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr) { struct pipe_screen *screen = mgr->pipe->screen; - mgr->caps.format_fixed32 = + mgr->b.caps.format_fixed32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_float16 = + mgr->b.caps.format_float16 = screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_float64 = + mgr->b.caps.format_float64 = screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_norm32 = + mgr->b.caps.format_norm32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER) && screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_scaled32 = + mgr->b.caps.format_scaled32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER) && screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER, @@ -130,7 +114,7 @@ u_vbuf_mgr_create(struct pipe_context *pipe, upload_buffer_alignment, upload_buffer_bind); - mgr->caps.fetch_dword_unaligned = + mgr->b.caps.fetch_dword_unaligned = fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; u_vbuf_mgr_init_format_caps(mgr); @@ -184,7 +168,7 @@ u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr, /* Check for support. */ if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] && - (mgr->caps.fetch_dword_unaligned || + (mgr->b.caps.fetch_dword_unaligned || (vb->buffer_offset % 4 == 0 && vb->stride % 4 == 0 && mgr->ve->ve[i].src_offset % 4 == 0))) { @@ -365,7 +349,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, /* Choose a native format. * For now we don't care about the alignment, that's going to * be sorted out later. */ - if (!mgr->caps.format_fixed32) { + if (!mgr->b.caps.format_fixed32) { switch (format) { FORMAT_REPLACE(R32_FIXED, R32_FLOAT); FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); @@ -374,7 +358,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_float16) { + if (!mgr->b.caps.format_float16) { switch (format) { FORMAT_REPLACE(R16_FLOAT, R32_FLOAT); FORMAT_REPLACE(R16G16_FLOAT, R32G32_FLOAT); @@ -383,7 +367,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_float64) { + if (!mgr->b.caps.format_float64) { switch (format) { FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); @@ -392,7 +376,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_norm32) { + if (!mgr->b.caps.format_norm32) { switch (format) { FORMAT_REPLACE(R32_UNORM, R32_FLOAT); FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); @@ -405,7 +389,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_scaled32) { + if (!mgr->b.caps.format_scaled32) { switch (format) { FORMAT_REPLACE(R32_USCALED, R32_FLOAT); FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); @@ -427,11 +411,11 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, ve->incompatible_layout = ve->incompatible_layout || ve->ve[i].src_format != ve->native_format[i] || - (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); + (!mgr->b.caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); } /* Align the formats to the size of DWORD if needed. */ - if (!mgr->caps.fetch_dword_unaligned) { + if (!mgr->b.caps.fetch_dword_unaligned) { for (i = 0; i < count; i++) { ve->native_format_size[i] = align(ve->native_format_size[i], 4); } @@ -472,7 +456,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, mgr->any_user_vbs = FALSE; mgr->incompatible_vb_layout = FALSE; - if (!mgr->caps.fetch_dword_unaligned) { + if (!mgr->b.caps.fetch_dword_unaligned) { /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { if (bufs[i].buffer) { diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h index 4e6372435d8..c653ca4346d 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.h +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -37,6 +37,21 @@ #include "pipe/p_state.h" #include "util/u_transfer.h" +/* Hardware vertex fetcher limitations can be described by this structure. */ +struct u_vbuf_caps { + /* Vertex format CAPs. */ + /* TRUE if hardware supports it. */ + unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ + unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ + unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ + unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ + unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ + + /* Whether vertex fetches don't have to be dword-aligned. */ + /* TRUE if hardware supports it. */ + unsigned fetch_dword_unaligned:1; +}; + /* The manager. * This structure should also be used to access vertex buffers * from a driver. */ @@ -63,6 +78,8 @@ struct u_vbuf_mgr { * - u_upload_buffer * - u_upload_flush */ struct u_upload_mgr *uploader; + + struct u_vbuf_caps caps; }; struct u_vbuf_resource { diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h index 1c1089d89d2..b822cba9293 100644 --- a/src/gallium/drivers/r600/r600_formats.h +++ b/src/gallium/drivers/r600/r600_formats.h @@ -99,7 +99,6 @@ static INLINE bool r600_is_vertex_format_supported(enum pipe_format format) /* No fixed, no double. */ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || - desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || (desc->channel[i].size == 64 && desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) return false; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 5d09d59e111..4cf02c9b18e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -273,6 +273,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_destroy_context(&rctx->context); return NULL; } + rctx->vbuf_mgr->caps.format_fixed32 = 0; rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { From f272117def7a7962f355581d038af6d0981a23cb Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Tue, 16 Aug 2011 20:37:13 +0800 Subject: [PATCH 346/600] dri2: check if context is valid before flushing the pipe --- src/gallium/state_trackers/dri/drm/dri2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 908a735234e..53638da9888 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -49,7 +49,8 @@ dri2_flush_drawable(__DRIdrawable *draw) struct dri_drawable *drawable = dri_drawable(draw); struct dri_context *ctx = dri_get_current(draw->driScreenPriv); - ctx->st->flush(ctx->st, 0, NULL); + if (ctx) + ctx->st->flush(ctx->st, 0, NULL); } static void From 61d2dfbe488cf5de5881c20fe1ead97f2ab5dabb Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Tue, 16 Aug 2011 19:23:18 +0200 Subject: [PATCH 347/600] egl: Add include paths for platform autodetection Needed since commit 85fe9484. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=40145 --- src/egl/main/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile index c100fbfeb9c..3172ad2ec03 100644 --- a/src/egl/main/Makefile +++ b/src/egl/main/Makefile @@ -63,6 +63,7 @@ EGL_LIB_DEPS += $(XCB_DRI2_LIBS) endif ifneq ($(findstring drm, $(EGL_PLATFORMS)),) EGL_LIB_DEPS += -lgbm +INCLUDE_DIRS += -I$(TOP)/src/gbm/main endif EGL_LIB_DEPS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) $(LIBDRM_LIB) $(WAYLAND_LIBS) endif @@ -70,6 +71,7 @@ endif ifneq ($(findstring wayland, $(EGL_PLATFORMS)),) LOCAL_LIBS += $(TOP)/src/egl/wayland/wayland-drm/libwayland-drm.a +INCLUDE_DIRS += $(WAYLAND_CFLAGS) endif ifeq ($(filter glx, $(EGL_DRIVERS_DIRS)),glx) From 4a47662beaa2092447939db7880531fb706afedd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 Aug 2011 19:35:10 +0200 Subject: [PATCH 348/600] r600g: rename bc -> bytecode It took me a while to figure out what it stands for. --- src/gallium/drivers/r600/eg_asm.c | 2 +- src/gallium/drivers/r600/r600_asm.c | 268 +++++------ src/gallium/drivers/r600/r600_asm.h | 68 +-- src/gallium/drivers/r600/r600_shader.c | 624 ++++++++++++------------- src/gallium/drivers/r600/r600_shader.h | 2 +- src/gallium/drivers/r600/r700_asm.c | 4 +- 6 files changed, 484 insertions(+), 484 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index c95872b0809..ca25b341ffd 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -29,7 +29,7 @@ #include "r600_opcodes.h" #include "evergreend.h" -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 6092432e6f2..0311b562f27 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -36,7 +36,7 @@ #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 -static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu) +static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { if(alu->is_op3) return 3; @@ -152,11 +152,11 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r return 3; } -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); -static struct r600_bc_cf *r600_bc_cf(void) +static struct r600_bytecode_cf *r600_bytecode_cf(void) { - struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf); + struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); if (cf == NULL) return NULL; @@ -167,9 +167,9 @@ static struct r600_bc_cf *r600_bc_cf(void) return cf; } -static struct r600_bc_alu *r600_bc_alu(void) +static struct r600_bytecode_alu *r600_bytecode_alu(void) { - struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu); + struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); if (alu == NULL) return NULL; @@ -177,9 +177,9 @@ static struct r600_bc_alu *r600_bc_alu(void) return alu; } -static struct r600_bc_vtx *r600_bc_vtx(void) +static struct r600_bytecode_vtx *r600_bytecode_vtx(void) { - struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx); + struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); if (vtx == NULL) return NULL; @@ -187,9 +187,9 @@ static struct r600_bc_vtx *r600_bc_vtx(void) return vtx; } -static struct r600_bc_tex *r600_bc_tex(void) +static struct r600_bytecode_tex *r600_bytecode_tex(void) { - struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex); + struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); if (tex == NULL) return NULL; @@ -197,15 +197,15 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; } -static int r600_bc_add_cf(struct r600_bc *bc) +static int r600_bytecode_add_cf(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = r600_bc_cf(); + struct r600_bytecode_cf *cf = r600_bytecode_cf(); if (cf == NULL) return -ENOMEM; @@ -219,7 +219,7 @@ static int r600_bc_add_cf(struct r600_bc *bc) return 0; } -int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) +int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output) { int r; @@ -254,16 +254,16 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) } } - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) return r; bc->cf_last->inst = output->inst; - memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output)); + memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); return 0; } /* alu instructions that can ony exits once per group */ -static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -344,7 +344,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -365,7 +365,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -380,7 +380,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -398,7 +398,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can only execute on the vector unit */ -static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || @@ -407,7 +407,7 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can only execute on the trans unit */ -static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -478,23 +478,23 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can execute on any unit */ -static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return !is_alu_vec_unit_inst(bc, alu) && !is_alu_trans_unit_inst(bc, alu); } -static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, - struct r600_bc_alu *assignment[5]) +static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, + struct r600_bytecode_alu *assignment[5]) { - struct r600_bc_alu *alu; + struct r600_bytecode_alu *alu; unsigned i, chan, trans; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; - for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { chan = alu->dst.chan; if (max_slots == 4) trans = 0; @@ -573,7 +573,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, return 0; } -static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) +static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; if (bc->chip_class >= R700) { @@ -615,12 +615,12 @@ static int is_const(int sel) sel <= V_SQ_ALU_SRC_LITERAL); } -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, +static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { int r, src, num_src, sel, elem, cycle; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; src++) { sel = alu->src[src].sel; elem = alu->src[src].chan; @@ -645,12 +645,12 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, +static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { int r, src, num_src, const_count, sel, elem, cycle; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (const_count = 0, src = 0; src < num_src; ++src) { sel = alu->src[src].sel; elem = alu->src[src].chan; @@ -691,8 +691,8 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static int check_and_set_bank_swizzle(struct r600_bc *bc, - struct r600_bc_alu *slots[5]) +static int check_and_set_bank_swizzle(struct r600_bytecode *bc, + struct r600_bytecode_alu *slots[5]) { struct alu_bank_swizzle bs; int bank_swizzle[5]; @@ -764,10 +764,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, return -1; } -static int replace_gpr_with_pv_ps(struct r600_bc *bc, - struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) +static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, + struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) { - struct r600_bc_alu *prev[5]; + struct r600_bytecode_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; @@ -789,11 +789,11 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, } for (i = 0; i < max_slots; ++i) { - struct r600_bc_alu *alu = slots[i]; + struct r600_bytecode_alu *alu = slots[i]; if(!alu) continue; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; @@ -821,7 +821,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, return 0; } -void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) +void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg) { switch(value) { case 0: @@ -854,10 +854,10 @@ void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) } /* compute how many literal are needed */ -static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, +static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t literal[4], unsigned *nliteral) { - unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); unsigned i, j; for (i = 0; i < num_src; ++i) { @@ -880,11 +880,11 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static void r600_bc_alu_adjust_literals(struct r600_bc *bc, - struct r600_bc_alu *alu, +static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc, + struct r600_bytecode_alu *alu, uint32_t literal[4], unsigned nliteral) { - unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); unsigned i, j; for (i = 0; i < num_src; ++i) { @@ -900,11 +900,11 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc, } } -static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], - struct r600_bc_alu *alu_prev) +static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], + struct r600_bytecode_alu *alu_prev) { - struct r600_bc_alu *prev[5]; - struct r600_bc_alu *result[5] = { NULL }; + struct r600_bytecode_alu *prev[5]; + struct r600_bytecode_alu *result[5] = { NULL }; uint32_t literal[4], prev_literal[4]; unsigned nliteral = 0, prev_nliteral = 0; @@ -919,13 +919,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], return r; for (i = 0; i < max_slots; ++i) { - struct r600_bc_alu *alu; + struct r600_bytecode_alu *alu; /* check number of literals */ if (prev[i]) { - if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral)) + if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral)) return 0; - if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) + if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) return 0; if (is_alu_mova_inst(bc, prev[i])) { if (have_rel) @@ -934,7 +934,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } num_once_inst += is_alu_once_inst(bc, prev[i]); } - if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) + if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral)) return 0; /* Let's check used slots. */ @@ -970,7 +970,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } /* Let's check source gprs */ - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (alu->src[src].rel) { if (have_mova) @@ -1020,7 +1020,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } /* determine new last instruction */ - LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; + LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; /* determine new first instruction */ for (i = 0; i < max_slots; ++i) { @@ -1040,9 +1040,9 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], * probably do slightly better by recognizing that we actually have two * consecutive lines of 16 constants, but the resulting code would also be * somewhat more complicated. */ -static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) +static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type) { - struct r600_bc_kcache *kcache = bc->cf_last->kcache; + struct r600_bytecode_kcache *kcache = bc->cf_last->kcache; unsigned int required_lines; unsigned int free_lines = 0; unsigned int cache_line[3]; @@ -1095,7 +1095,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Start a new ALU clause if needed. */ if (required_lines > free_lines) { - if ((r = r600_bc_add_cf(bc))) { + if ((r = r600_bytecode_add_cf(bc))) { return r; } bc->cf_last->inst = (type << 3); @@ -1150,15 +1150,15 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al return 0; } -int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) +int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type) { - struct r600_bc_alu *nalu = r600_bc_alu(); - struct r600_bc_alu *lalu; + struct r600_bytecode_alu *nalu = r600_bytecode_alu(); + struct r600_bytecode_alu *lalu; int i, r; if (nalu == NULL) return -ENOMEM; - memcpy(nalu, alu, sizeof(struct r600_bc_alu)); + memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { /* check if we could add it anyway */ @@ -1176,7 +1176,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(nalu); return r; @@ -1186,7 +1186,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* Setup the kcache for this ALU instruction. This will start a new * ALU clause if needed. */ - if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { free(nalu); return r; } @@ -1200,7 +1200,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->ngpr = nalu->src[i].sel + 1; } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) - r600_bc_special_constants(nalu->src[i].value, + r600_bytecode_special_constants(nalu->src[i].value, &nalu->src[i].sel, &nalu->src[i].neg); } if (nalu->dst.sel >= bc->ngpr) { @@ -1215,7 +1215,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu->last) { uint32_t literal[4]; unsigned nliteral; - struct r600_bc_alu *slots[5]; + struct r600_bytecode_alu *slots[5]; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) @@ -1239,7 +1239,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int for (i = 0, nliteral = 0; i < max_slots; i++) { if (slots[i]) { - r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); + r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral); if (r) return r; } @@ -1259,12 +1259,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int return 0; } -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) { - return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) +static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) { switch (bc->chip_class) { case R600: @@ -1283,7 +1283,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) } } -static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) +static inline boolean last_inst_was_vtx_fetch(struct r600_bytecode *bc) { if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) @@ -1296,20 +1296,20 @@ static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) return FALSE; } -int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) +int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) { - struct r600_bc_vtx *nvtx = r600_bc_vtx(); + struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); int r; if (nvtx == NULL) return -ENOMEM; - memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx)); + memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || last_inst_was_vtx_fetch(bc) || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(nvtx); return r; @@ -1323,24 +1323,24 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } -int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) +int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) { - struct r600_bc_tex *ntex = r600_bc_tex(); + struct r600_bytecode_tex *ntex = r600_bytecode_tex(); int r; if (ntex == NULL) return -ENOMEM; - memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); /* we can't fetch data und use it as texture lookup address in the same TEX clause */ if (bc->cf_last != NULL && bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - struct r600_bc_tex *ttex; + struct r600_bytecode_tex *ttex; LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { if (ttex->dst_gpr == ntex->src_gpr) { bc->force_add_cf = 1; @@ -1356,7 +1356,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(ntex); return r; @@ -1373,15 +1373,15 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } -int r600_bc_add_cfinst(struct r600_bc *bc, int inst) +int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst) { int r; - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) return r; @@ -1390,13 +1390,13 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) return 0; } -int cm_bc_add_cf_end(struct r600_bc *bc) +int cm_bytecode_add_cf_end(struct r600_bytecode *bc) { - return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); + return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); } /* common to all 3 families */ -static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) { bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | @@ -1425,7 +1425,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign } /* common to all 3 families */ -static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) +static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | @@ -1455,7 +1455,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign } /* r600 only, r700/eg bits in r700_asm.c */ -static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) { /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | @@ -1496,7 +1496,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign return 0; } -static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) { *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | @@ -1505,7 +1505,7 @@ static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf } /* common for r600/r700 - eg in eg_asm.c */ -static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; @@ -1531,9 +1531,9 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: if (bc->chip_class == R700) - r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); else - r600_bc_cf_vtx_build(&bc->bytecode[id], cf); + r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: @@ -1573,12 +1573,12 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) return 0; } -int r600_bc_build(struct r600_bc *bc) +int r600_bytecode_build(struct r600_bytecode *bc) { - struct r600_bc_cf *cf; - struct r600_bc_alu *alu; - struct r600_bc_vtx *vtx; - struct r600_bc_tex *tex; + struct r600_bytecode_cf *cf; + struct r600_bytecode_alu *alu; + struct r600_bytecode_vtx *vtx; + struct r600_bytecode_tex *tex; uint32_t literal[4]; unsigned nliteral; unsigned addr; @@ -1638,9 +1638,9 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; if (bc->chip_class >= EVERGREEN) - r = eg_bc_cf_build(bc, cf); + r = eg_bytecode_cf_build(bc, cf); else - r = r600_bc_cf_build(bc, cf); + r = r600_bytecode_cf_build(bc, cf); if (r) return r; switch (cf->inst) { @@ -1651,18 +1651,18 @@ int r600_bc_build(struct r600_bc *bc) nliteral = 0; memset(literal, 0, sizeof(literal)); LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); if (r) return r; - r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); + r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); switch(bc->chip_class) { case R600: - r = r600_bc_alu_build(bc, alu, addr); + r = r600_bytecode_alu_build(bc, alu, addr); break; case R700: case EVERGREEN: /* eg alu is same encoding as r700 */ case CAYMAN: /* eg alu is same encoding as r700 */ - r = r700_bc_alu_build(bc, alu, addr); + r = r700_bytecode_alu_build(bc, alu, addr); break; default: R600_ERR("unknown chip class %d.\n", bc->chip_class); @@ -1683,7 +1683,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bc_vtx_build(bc, vtx, addr); + r = r600_bytecode_vtx_build(bc, vtx, addr); if (r) return r; addr += 4; @@ -1692,14 +1692,14 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_TEX: if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bc_vtx_build(bc, vtx, addr); + r = r600_bytecode_vtx_build(bc, vtx, addr); if (r) return r; addr += 4; } } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { - r = r600_bc_tex_build(bc, tex, addr); + r = r600_bytecode_tex_build(bc, tex, addr); if (r) return r; addr += 4; @@ -1728,17 +1728,17 @@ int r600_bc_build(struct r600_bc *bc) return 0; } -void r600_bc_clear(struct r600_bc *bc) +void r600_bytecode_clear(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = NULL, *next_cf; + struct r600_bytecode_cf *cf = NULL, *next_cf; free(bc->bytecode); bc->bytecode = NULL; LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { - struct r600_bc_alu *alu = NULL, *next_alu; - struct r600_bc_tex *tex = NULL, *next_tex; - struct r600_bc_tex *vtx = NULL, *next_vtx; + struct r600_bytecode_alu *alu = NULL, *next_alu; + struct r600_bytecode_tex *tex = NULL, *next_tex; + struct r600_bytecode_tex *vtx = NULL, *next_vtx; LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { free(alu); @@ -1764,12 +1764,12 @@ void r600_bc_clear(struct r600_bc *bc) LIST_INITHEAD(&cf->list); } -void r600_bc_dump(struct r600_bc *bc) +void r600_bytecode_dump(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = NULL; - struct r600_bc_alu *alu = NULL; - struct r600_bc_vtx *vtx = NULL; - struct r600_bc_tex *tex = NULL; + struct r600_bytecode_cf *cf = NULL; + struct r600_bytecode_alu *alu = NULL; + struct r600_bytecode_vtx *vtx = NULL; + struct r600_bytecode_tex *tex = NULL; unsigned i, id; uint32_t literal[4]; @@ -1868,7 +1868,7 @@ void r600_bc_dump(struct r600_bc *bc) id = cf->addr; nliteral = 0; LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); @@ -2122,8 +2122,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru { static int dump_shaders = -1; - struct r600_bc bc; - struct r600_bc_vtx vtx; + struct r600_bytecode bc; + struct r600_bytecode_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; @@ -2144,11 +2144,11 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r600_bc_init(&bc, rctx->chip_class); + r600_bytecode_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); @@ -2163,8 +2163,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_add_alu(&bc, &alu))) { + r600_bytecode_clear(&bc); return r; } } @@ -2175,7 +2175,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); R600_ERR("unknown format %d\n", ve->elements[i].src_format); return -EINVAL; } @@ -2200,16 +2200,16 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.offset = elements[i].src_offset; vtx.endian = endian; - if ((r = r600_bc_add_vtx(&bc, &vtx))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { + r600_bytecode_clear(&bc); return r; } } - r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); - if ((r = r600_bc_build(&bc))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_build(&bc))) { + r600_bytecode_clear(&bc); return r; } @@ -2218,7 +2218,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - r600_bc_dump(&bc); + r600_bytecode_dump(&bc); fprintf(stderr, "______________________________________________________________\n"); } @@ -2227,13 +2227,13 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (ve->fetch_shader == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); return -ENOMEM; } bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (bytecode == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); r600_bo_reference(&ve->fetch_shader, NULL); return -ENOMEM; } @@ -2247,7 +2247,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } r600_bo_unmap(rctx->radeon, ve->fetch_shader); - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 5dec95acf1d..61caa4b915e 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -26,7 +26,7 @@ struct r600_vertex_element; struct r600_pipe_context; -struct r600_bc_alu_src { +struct r600_bytecode_alu_src { unsigned sel; unsigned chan; unsigned neg; @@ -35,7 +35,7 @@ struct r600_bc_alu_src { uint32_t value; }; -struct r600_bc_alu_dst { +struct r600_bytecode_alu_dst { unsigned sel; unsigned chan; unsigned clamp; @@ -43,10 +43,10 @@ struct r600_bc_alu_dst { unsigned rel; }; -struct r600_bc_alu { +struct r600_bytecode_alu { struct list_head list; - struct r600_bc_alu_src src[3]; - struct r600_bc_alu_dst dst; + struct r600_bytecode_alu_src src[3]; + struct r600_bytecode_alu_dst dst; unsigned inst; unsigned last; unsigned is_op3; @@ -56,7 +56,7 @@ struct r600_bc_alu { unsigned omod; }; -struct r600_bc_tex { +struct r600_bytecode_tex { struct list_head list; unsigned inst; unsigned resource_id; @@ -83,7 +83,7 @@ struct r600_bc_tex { unsigned src_sel_w; }; -struct r600_bc_vtx { +struct r600_bytecode_vtx { struct list_head list; unsigned inst; unsigned fetch_type; @@ -105,7 +105,7 @@ struct r600_bc_vtx { unsigned endian; }; -struct r600_bc_output { +struct r600_bytecode_output { unsigned array_base; unsigned type; unsigned end_of_program; @@ -120,13 +120,13 @@ struct r600_bc_output { unsigned barrier; }; -struct r600_bc_kcache { +struct r600_bytecode_kcache { unsigned bank; unsigned mode; unsigned addr; }; -struct r600_bc_cf { +struct r600_bytecode_cf { struct list_head list; unsigned inst; unsigned addr; @@ -135,15 +135,15 @@ struct r600_bc_cf { unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ - struct r600_bc_kcache kcache[2]; + struct r600_bytecode_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; - struct r600_bc_output output; - struct r600_bc_alu *curr_bs_head; - struct r600_bc_alu *prev_bs_head; - struct r600_bc_alu *prev2_bs_head; + struct r600_bytecode_output output; + struct r600_bytecode_alu *curr_bs_head; + struct r600_bytecode_alu *prev_bs_head; + struct r600_bytecode_alu *prev2_bs_head; }; #define FC_NONE 0 @@ -155,8 +155,8 @@ struct r600_bc_cf { struct r600_cf_stack_entry { int type; - struct r600_bc_cf *start; - struct r600_bc_cf **mid; /* used to store the else point */ + struct r600_bytecode_cf *start; + struct r600_bytecode_cf **mid; /* used to store the else point */ int num_mid; }; @@ -168,11 +168,11 @@ struct r600_cf_callstack { int max; }; -struct r600_bc { +struct r600_bytecode { enum chip_class chip_class; int type; struct list_head cf; - struct r600_bc_cf *cf_last; + struct r600_bytecode_cf *cf_last; unsigned ndw; unsigned ncf; unsigned ngpr; @@ -187,27 +187,27 @@ struct r600_bc { }; /* eg_asm.c */ -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); +int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); -void r600_bc_clear(struct r600_bc *bc); -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); -int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); -int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); -int r600_bc_build(struct r600_bc *bc); -int r600_bc_add_cfinst(struct r600_bc *bc, int inst); -int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); -void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); -void r600_bc_dump(struct r600_bc *bc); +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class); +void r600_bytecode_clear(struct r600_bytecode *bc); +int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); +int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); +int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex); +int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output); +int r600_bytecode_build(struct r600_bytecode *bc); +int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst); +int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type); +void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg); +void r600_bytecode_dump(struct r600_bytecode *bc); -int cm_bc_add_cf_end(struct r600_bc *bc); +int cm_bytecode_add_cf_end(struct r600_bytecode *bc); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ -void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf); -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); +void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f86804eadcf..406e87bdb00 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -140,13 +140,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("translation from TGSI failed !\n"); return r; } - r = r600_bc_build(&shader->shader.bc); + r = r600_bytecode_build(&shader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); return r; } if (dump_shaders) { - r600_bc_dump(&shader->shader.bc); + r600_bytecode_dump(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); } return r600_pipe_shader(ctx, shader); @@ -155,7 +155,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { r600_bo_reference(&shader->bo, NULL); - r600_bc_clear(&shader->shader.bc); + r600_bytecode_clear(&shader->shader.bc); memset(&shader->shader,0,sizeof(struct r600_shader)); } @@ -183,7 +183,7 @@ struct r600_shader_ctx { unsigned temp_reg; unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; + struct r600_bytecode *bc; struct r600_shader *shader; struct r600_shader_src src[4]; u32 *literals; @@ -244,7 +244,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int gpr = 0, base_chan = 0; int ij_index = 0; @@ -270,7 +270,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (i < 4) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; @@ -292,7 +292,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -345,8 +345,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SYSTEM_VALUE: if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); alu.src[0].sel = 0; @@ -357,7 +357,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; } @@ -440,7 +440,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return; } @@ -464,12 +464,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx, static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) { - struct r600_bc_vtx vtx; + struct r600_bytecode_vtx vtx; unsigned int ar_reg; int r; if (offset) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); @@ -483,7 +483,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; ar_reg = dst_reg; @@ -506,7 +506,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ vtx.endian = r600_endian_swap(32); - if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) return r; return 0; @@ -515,7 +515,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, k, nconst, r; for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -540,7 +540,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) } else if (j > 0) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; @@ -550,7 +550,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (k == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -566,7 +566,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, k, nliteral, r; for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -578,7 +578,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; @@ -588,7 +588,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (k == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -606,14 +606,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; - struct r600_bc_output output[32]; + struct r600_bytecode_output output[32]; unsigned output_done, noutput; unsigned opcode; int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; - r600_bc_init(ctx.bc, rctx->chip_class); + r600_bytecode_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -653,9 +653,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; if (ctx.bc->chip_class >= EVERGREEN) { - r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { - r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { @@ -746,8 +746,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi int j; for (j = 0; j < 4; j++) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); /* MOV_SAT R, R */ alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -761,7 +761,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (j == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx.bc, &alu); + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -772,7 +772,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi /* export output */ j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { - memset(&output[i], 0, sizeof(struct r600_bc_output)); + memset(&output[i], 0, sizeof(struct r600_bytecode_output)); output[i + j].gpr = shader->output[i].gpr; output[i + j].elem_size = 3; output[i + j].swizzle_x = 0; @@ -805,7 +805,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { for (j = 1; j < shader->nr_cbufs; j++) { - memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); output[i + j].gpr = shader->output[i].gpr; output[i + j].elem_size = 3; output[i + j].swizzle_x = 0; @@ -854,7 +854,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } } if (!pos0) { - memset(&output[i], 0, sizeof(struct r600_bc_output)); + memset(&output[i], 0, sizeof(struct r600_bytecode_output)); output[i].gpr = 0; output[i].elem_size = 3; output[i].swizzle_x = 0; @@ -871,7 +871,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* add fake pixel export */ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { - memset(&output[0], 0, sizeof(struct r600_bc_output)); + memset(&output[0], 0, sizeof(struct r600_bytecode_output)); output[0].gpr = 0; output[0].elem_size = 3; output[0].swizzle_x = 7; @@ -899,13 +899,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* add output to bytecode */ for (i = 0; i < noutput; i++) { - r = r600_bc_add_output(ctx.bc, &output[i]); + r = r600_bytecode_add_output(ctx.bc, &output[i]); if (r) goto out_err; } /* add program end */ if (ctx.bc->chip_class == CAYMAN) - cm_bc_add_cf_end(ctx.bc); + cm_bytecode_add_cf_end(ctx.bc); free(ctx.literals); tgsi_parse_free(&ctx.parse); @@ -928,7 +928,7 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static void r600_bc_src(struct r600_bc_alu_src *bc_src, +static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, const struct r600_shader_src *shader_src, unsigned chan) { @@ -940,13 +940,13 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } -static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) { bc_src->abs = 1; bc_src->neg = 0; } -static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) { bc_src->neg = !bc_src->neg; } @@ -954,7 +954,7 @@ static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) + struct r600_bytecode_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -984,7 +984,7 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -992,25 +992,25 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } } else { - r600_bc_src(&alu.src[0], &ctx->src[1], i); - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - r600_bc_src_toggle_neg(&alu.src[1]); + r600_bytecode_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src_set_abs(&alu.src[0]); break; default: break; @@ -1018,7 +1018,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1039,21 +1039,21 @@ static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, j, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; for (i = 0 ; i < last_slot; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], 0); + r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1072,9 +1072,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) static float neg_pi = -3.1415926535; int r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1082,7 +1082,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; @@ -1090,11 +1090,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); alu.dst.chan = 0; @@ -1104,11 +1104,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1134,7 +1134,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) } alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -1143,7 +1143,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) static int cayman_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; int i, r; @@ -1153,7 +1153,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) for (i = 0; i < last_slot; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; @@ -1164,7 +1164,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1174,7 +1174,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1182,7 +1182,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; @@ -1191,7 +1191,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1200,14 +1200,14 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1217,7 +1217,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; /* We'll only need the trig stuff if we are going to write to the @@ -1233,7 +1233,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1245,19 +1245,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1267,7 +1267,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 1) @@ -1278,19 +1278,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1298,7 +1298,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.z = 0.0; */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1309,14 +1309,14 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* dst.w = 1.0; */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1327,7 +1327,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1337,11 +1337,11 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; @@ -1352,12 +1352,12 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1371,13 +1371,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; /* tmp.x = max(src.y, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 1; @@ -1386,7 +1386,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1399,7 +1399,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* tmp.z = log(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1411,13 +1411,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) } else alu.dst.write = 0; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { /* tmp.z = log(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1425,7 +1425,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.dst.chan = 2; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1434,25 +1434,25 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); alu.src[0].sel = sel; alu.src[0].chan = chan; - r600_bc_src(&alu.src[1], &ctx->src[0], 3); - r600_bc_src(&alu.src[2], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.is_op3 = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1462,56 +1462,56 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.last = 1; } else alu.dst.write = 0; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1521,10 +1521,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) static int tgsi_rsq(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); /* FIXME: * For state trackers other than OpenGL, we'll want to use @@ -1533,13 +1533,13 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r600_bc_src(&alu.src[i], &ctx->src[i], 0); - r600_bc_src_set_abs(&alu.src[i]); + r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); + r600_bytecode_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* replicate result */ @@ -1549,11 +1549,11 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; @@ -1561,7 +1561,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1571,18 +1571,18 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r600_bc_src(&alu.src[i], &ctx->src[i], 0); + r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* replicate result */ @@ -1593,38 +1593,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* b * LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[1], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; for (i = 0; i < last_slot; i++) { /* POW(a,b) = EXP2(b * LOG2(a))*/ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -1632,7 +1632,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx) alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1641,38 +1641,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; /* LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* b * LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[1], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return tgsi_helper_tempx_replicate(ctx); @@ -1681,32 +1681,32 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; - r600_bc_src(&alu.src[2], &ctx->src[0], i); + r600_bytecode_src(&alu.src[2], &ctx->src[0], i); if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1723,7 +1723,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1732,11 +1732,11 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); alu.dst.chan = i; @@ -1749,7 +1749,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1759,7 +1759,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1767,10 +1767,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1780,7 +1780,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1790,14 +1790,14 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1830,7 +1830,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1857,8 +1857,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_tex tex; - struct r600_bc_alu alu; + struct r600_bytecode_tex tex; + struct r600_bytecode_alu alu; unsigned src_gpr; int r, i, j; int opcode; @@ -1876,7 +1876,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 1; i < 3; i++) { /* set gradients h/v */ - memset(&tex, 0, sizeof(struct r600_bc_tex)); + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : SQ_TEX_INST_SET_GRADIENTS_V; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); @@ -1890,15 +1890,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_w = 3; for (j = 0; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &ctx->src[i], j); + r600_bytecode_src(&alu.src[0], &ctx->src[i], j); alu.dst.sel = tex.src_gpr; alu.dst.chan = j; if (j == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1919,7 +1919,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_z = 1; tex.coord_type_w = 1; } - r = r600_bc_add_tex(ctx->bc, &tex); + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; } @@ -1929,9 +1929,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1939,40 +1939,40 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.last = 1; if (out_chan == i) alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { out_chan = 3; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = out_chan; alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = out_chan; - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1980,7 +1980,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 3; alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; src_loaded = TRUE; @@ -1993,16 +1993,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); - r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2010,7 +2010,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.z = RCP_e(|tmp1.z|) */ if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; @@ -2021,12 +2021,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; @@ -2035,7 +2035,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 2; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2044,7 +2044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2061,11 +2061,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 0; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2083,7 +2083,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2093,15 +2093,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (src_requires_loading && !src_loaded) { for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2124,7 +2124,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } } - memset(&tex, 0, sizeof(struct r600_bc_tex)); + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); tex.inst = opcode; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); @@ -2171,7 +2171,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = tex.src_sel_z; - r = r600_bc_add_tex(ctx->bc, &tex); + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; @@ -2182,7 +2182,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; @@ -2193,17 +2193,17 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - r600_bc_src(&alu.src[0], &ctx->src[1], i); - r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2215,19 +2215,19 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r600_bc_src(&alu.src[1], &ctx->src[0], i); - r600_bc_src_toggle_neg(&alu.src[1]); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { alu.last = 1; } alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2237,18 +2237,18 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { alu.last = 1; } alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2258,11 +2258,11 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - r600_bc_src(&alu.src[0], &ctx->src[0], i); - r600_bc_src(&alu.src[1], &ctx->src[1], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; @@ -2271,7 +2271,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2281,7 +2281,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -2289,18 +2289,18 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - r600_bc_src(&alu.src[0], &ctx->src[0], i); - r600_bc_src(&alu.src[1], &ctx->src[2], i); - r600_bc_src(&alu.src[2], &ctx->src[1], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[2], &ctx->src[1], i); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; if (i == lasti) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2312,7 +2312,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; static const unsigned int src0_swizzle[] = {2, 0, 1}; static const unsigned int src1_swizzle[] = {1, 2, 0}; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; uint32_t use_temp = 0; int i, r; @@ -2320,11 +2320,11 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) use_temp = 1; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); if (i < 3) { - r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; @@ -2338,18 +2338,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); if (i < 3) { - r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; @@ -2370,7 +2370,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) alu.is_op3 = 1; if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2382,22 +2382,22 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; int i; /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2413,7 +2413,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2426,7 +2426,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2434,10 +2434,10 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.y = tmp - floor(tmp); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; #if 0 @@ -2450,7 +2450,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2459,9 +2459,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2470,14 +2470,14 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2485,7 +2485,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2493,7 +2493,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.w = 1.0;*/ if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2503,7 +2503,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.chan = 3; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2513,7 +2513,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) static int tgsi_log(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; int i; @@ -2521,11 +2521,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & 1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2533,23 +2533,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2563,7 +2563,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2573,11 +2573,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2586,28 +2586,28 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; @@ -2618,13 +2618,13 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2636,12 +2636,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2651,14 +2651,14 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2670,12 +2670,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2685,17 +2685,17 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2705,7 +2705,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2714,11 +2714,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2727,23 +2727,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.dst.chan = 2; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2751,7 +2751,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.w = 1.0; */ if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2762,7 +2762,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2773,10 +2773,10 @@ static int tgsi_log(struct r600_shader_ctx *ctx) static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: @@ -2790,11 +2790,11 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2804,12 +2804,12 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) * between ARL and AR usage. The easy way to do that is to remove * the MOVA here, and load it for the first AR access after ar_reg * has been modified in each clause. */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -2818,19 +2818,19 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: memset(&alu, 0, sizeof(alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; memset(&alu, 0, sizeof(alu)); @@ -2840,18 +2840,18 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; case TGSI_OPCODE_ARR: memset(&alu, 0, sizeof(alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; default: @@ -2864,7 +2864,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->ar_reg; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; ctx->bc->cf_last->r6xx_uses_waterfall = 1; @@ -2874,11 +2874,11 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) static int tgsi_opdst(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r = 0; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2886,17 +2886,17 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); } if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[1], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2905,10 +2905,10 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = opcode; alu.predicate = 1; @@ -2916,13 +2916,13 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); + r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); if (r) return r; return 0; @@ -2953,7 +2953,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) } if (force_pop) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); ctx->bc->cf_last->pop_count = pops; ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; } @@ -3024,8 +3024,8 @@ static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) { struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; - sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, - sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); + sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, + sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); sp->mid[sp->num_mid] = ctx->bc->cf_last; sp->num_mid++; } @@ -3053,14 +3053,14 @@ static void fc_poplevel(struct r600_shader_ctx *ctx) #if 0 static int emit_return(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); + r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); return 0; } static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) { - r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); + r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); ctx->bc->cf_last->pop_count = pops; /* TODO work out offset */ return 0; @@ -3089,7 +3089,7 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) { emit_testflag(ctx); - r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fc_sp); @@ -3102,7 +3102,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) { emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); fc_pushlevel(ctx, FC_IF); @@ -3112,7 +3112,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) static int tgsi_else(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, ctx->bc->fc_sp); @@ -3142,7 +3142,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) static int tgsi_bgnloop(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); fc_pushlevel(ctx, FC_LOOP); @@ -3155,7 +3155,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) { int i; - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { R600_ERR("loop/endloop in shader code are not paired.\n"); @@ -3195,7 +3195,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return -EINVAL; } - r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fscp); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 600c3e2f540..ada369ade68 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -37,7 +37,7 @@ struct r600_shader_io { struct r600_shader { unsigned processor_type; - struct r600_bc bc; + struct r600_bytecode bc; unsigned ninput; unsigned noutput; unsigned npos; diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index b3c7d1494fc..74efe226530 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -26,7 +26,7 @@ #include "r600_asm.h" #include "r700_sq.h" -void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) { unsigned count = (cf->ndw / 4) - 1; *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); @@ -36,7 +36,7 @@ void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) S_SQ_CF_WORD1_COUNT_3(count >> 3); } -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) { bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | From 5e7713caa9d601e59b600218a3b867db0f331deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 Aug 2011 18:48:11 +0200 Subject: [PATCH 349/600] st/dri: remove an unused-but-set variable --- src/gallium/state_trackers/dri/drm/dri2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 53638da9888..cf476056f41 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -46,7 +46,6 @@ static void dri2_flush_drawable(__DRIdrawable *draw) { - struct dri_drawable *drawable = dri_drawable(draw); struct dri_context *ctx = dri_get_current(draw->driScreenPriv); if (ctx) From 9e8f556b199a662c5525b9d03f52a067244fa602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 Aug 2011 19:06:55 +0200 Subject: [PATCH 350/600] softpipe: fix an obvious copy-paste error in get_query_result Reviewed-by: Brian Paul --- src/gallium/drivers/softpipe/sp_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 4ae69c1c2bd..88f42572f19 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -157,7 +157,7 @@ softpipe_get_query_result(struct pipe_context *pipe, /*os_get_time is in microseconds*/ td.frequency = 1000000; td.disjoint = FALSE; - memcpy(vresult, &sq->so, + memcpy(vresult, &td, sizeof(struct pipe_query_data_timestamp_disjoint)); } break; From af501e2b29c7fb161671dc5b3395eee1d1b16d3f Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 12 Aug 2011 10:20:34 -0700 Subject: [PATCH 351/600] glsl: Fix type error when lowering integer divisions This patch fixes a bug when lowering an integer division: x/y to a multiplication by a reciprocal: int(float(x)*reciprocal(float(y))) If x was a plain int and y was an ivecN, the lowering pass incorrectly assigned the type of the product to be float, when in fact it should be vecN. This caused mesa to abort with an IR validation error. Fixes piglit tests {fs,vs}-op-div-int-ivec{2,3,4}. Reviewed-by: Kenneth Graunke --- src/glsl/lower_instructions.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 806f8639959..23aa19bde6f 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -166,6 +166,10 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) else op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->type->vector_elements, + ir->type->matrix_columns); + op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { From 11e4ea0010c3a756cfdaf427c14e104c9a11a645 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Aug 2011 13:05:26 -0600 Subject: [PATCH 352/600] mesa: ChooseTextureFormat() returns gl_format, not GLuint --- src/mesa/main/dd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index e0c5844e193..b5ed9a40c70 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -194,7 +194,7 @@ struct dd_function_table { * cases, srcFormat and srcType can be GL_NONE. * Called by glTexImage(), etc. */ - GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, + gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** From c1f00731fd48dde68b67f157c27eb20982e82193 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 May 2011 15:27:38 -0700 Subject: [PATCH 353/600] i965: Generate driver-specific IR for non-fragment shaders as well. This will be used by the new vertex shader backend. The scalarizing passes are skipped for non-fragment, since vertex and geometry threads are based on vec4s. --- src/mesa/drivers/dri/i965/brw_shader.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9471883fb2b..f4005f80055 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -75,10 +75,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { struct brw_context *brw = brw_context(ctx); struct intel_context *intel = &brw->intel; + unsigned int stage; + + for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) { + struct brw_shader *shader = + (struct brw_shader *)prog->_LinkedShaders[stage]; + + if (!shader) + continue; - struct brw_shader *shader = - (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (shader != NULL) { void *mem_ctx = ralloc_context(NULL); bool progress; @@ -116,8 +121,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) do { progress = false; - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); + if (stage == MESA_SHADER_FRAGMENT) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ From 6034b9a5124475d300d0678bd2fb6160865fa972 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 May 2011 10:55:50 -0700 Subject: [PATCH 354/600] i965: Create a shared enum for hardware and compiler-internal opcodes. This should make gdbing more pleasant, and it might be used in sharing part of the codegen between the VS and FS backends. --- src/mesa/drivers/dri/i965/brw_defines.h | 134 +++++++++++------- src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +- src/mesa/drivers/dri/i965/brw_fs.h | 56 +++----- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 + .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 20 +-- .../dri/i965/brw_fs_schedule_instructions.cpp | 15 -- src/mesa/drivers/dri/i965/brw_shader.h | 4 + 7 files changed, 120 insertions(+), 126 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0a3027d04ad..fe5d29c4328 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,58 +557,88 @@ #define BRW_WE_ALL 1 /** @} */ -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_CMPN 17 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_SENDC 50 -#define BRW_OPCODE_MATH 56 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_PLN 90 -#define BRW_OPCODE_NOP 126 +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + FS_OPCODE_RCP, + FS_OPCODE_RSQ, + FS_OPCODE_SQRT, + FS_OPCODE_EXP2, + FS_OPCODE_LOG2, + FS_OPCODE_POW, + FS_OPCODE_SIN, + FS_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXL, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, +}; #define BRW_PREDICATE_NONE 0 #define BRW_PREDICATE_NORMAL 1 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cafb7092ac8..a0d75cc6f96 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -522,7 +522,7 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { case FS_OPCODE_RCP: @@ -565,7 +565,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { int base_mrf = 2; fs_inst *inst; @@ -1149,6 +1149,9 @@ fs_visitor::propagate_constants() progress = true; } break; + + default: + break; } } @@ -1200,6 +1203,8 @@ fs_visitor::opt_algebraic() break; } + break; + default: break; } } @@ -1267,6 +1272,8 @@ fs_visitor::register_coalesce() case BRW_OPCODE_ENDIF: if_depth--; break; + default: + break; } if (loop_depth || if_depth) continue; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4ec649014de..d207ac27aa2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -25,6 +25,8 @@ * */ +#include "brw_shader.h" + extern "C" { #include @@ -55,33 +57,6 @@ enum register_file { BAD_FILE }; -enum fs_opcodes { - FS_OPCODE_FB_WRITE = 256, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_TEX, - FS_OPCODE_TXB, - FS_OPCODE_TXD, - FS_OPCODE_TXL, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, -}; - - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's @@ -227,13 +202,13 @@ public: init(); } - fs_inst(int opcode) + fs_inst(enum opcode opcode) { init(); this->opcode = opcode; } - fs_inst(int opcode, fs_reg dst) + fs_inst(enum opcode opcode, fs_reg dst) { init(); this->opcode = opcode; @@ -243,7 +218,7 @@ public: assert(dst.reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) { init(); this->opcode = opcode; @@ -256,7 +231,7 @@ public: assert(src[0].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { init(); this->opcode = opcode; @@ -272,7 +247,7 @@ public: assert(src[1].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) { init(); this->opcode = opcode; @@ -331,7 +306,7 @@ public: opcode == FS_OPCODE_POW); } - int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; bool saturate; @@ -448,27 +423,28 @@ public: fs_inst *emit(fs_inst inst); - fs_inst *emit(int opcode) + fs_inst *emit(enum opcode opcode) { return emit(fs_inst(opcode)); } - fs_inst *emit(int opcode, fs_reg dst) + fs_inst *emit(enum opcode opcode, fs_reg dst) { return emit(fs_inst(opcode, dst)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) { return emit(fs_inst(opcode, dst, src0)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { return emit(fs_inst(opcode, dst, src0, src1)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst *emit(enum opcode opcode, fs_reg dst, + fs_reg src0, fs_reg src1, fs_reg src2) { return emit(fs_inst(opcode, dst, src0, src1, src2)); } @@ -529,8 +505,8 @@ public: int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, int sampler); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index e168e541bef..529df0880f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -277,6 +277,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; + default: + assert(!"not reached"); + break; } } else { switch (inst->opcode) { @@ -317,6 +320,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; + default: + assert(!"not reached"); + break; } } assert(msg_type != -1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5c9cba99ae5..7c5414ac26c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -25,23 +25,6 @@ * */ -extern "C" { - -#include - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -359,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) no_spill[inst->dst.reg] = true; break; + + default: + break; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index f1a88fcfa79..965a5b333a2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -25,21 +25,6 @@ * */ -extern "C" { - -#include - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 4c568a26caa..21671d1c8d6 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,5 +21,9 @@ * IN THE SOFTWARE. */ +#include + +#pragma once + int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); From 65b5cbbcf783f6c668ab5b31a0734680dd396794 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 12:38:58 -0700 Subject: [PATCH 355/600] i965: Rename math FS_OPCODE_* to SHADER_OPCODE_*. I want to just use the same enums in the VS. --- src/mesa/drivers/dri/i965/brw_defines.h | 16 ++++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++---------- src/mesa/drivers/dri/i965/brw_fs.h | 16 ++++----- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 34 +++++++++---------- .../dri/i965/brw_fs_schedule_instructions.cpp | 16 ++++----- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 20 +++++------ 6 files changed, 68 insertions(+), 68 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fe5d29c4328..da8d016da42 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -616,14 +616,14 @@ enum opcode { * instructions. */ FS_OPCODE_FB_WRITE = 128, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, FS_OPCODE_DDX, FS_OPCODE_DDY, FS_OPCODE_PIXEL_X, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a0d75cc6f96..693ef0ce31a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -143,15 +143,15 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 0; switch (inst->opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: @@ -525,13 +525,13 @@ fs_inst * fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: break; default: assert(!"not reached: bad math opcode"); @@ -570,7 +570,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) int base_mrf = 2; fs_inst *inst; - assert(opcode == FS_OPCODE_POW); + assert(opcode == SHADER_OPCODE_POW); if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -1135,7 +1135,7 @@ fs_visitor::propagate_constants() } break; - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: /* The hardware doesn't do math on immediate values * (because why are you doing that, seriously?), but * the correct answer is to just constant fold it diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d207ac27aa2..94af0e1af16 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -296,14 +296,14 @@ public: bool is_math() { - return (opcode == FS_OPCODE_RCP || - opcode == FS_OPCODE_RSQ || - opcode == FS_OPCODE_SQRT || - opcode == FS_OPCODE_EXP2 || - opcode == FS_OPCODE_LOG2 || - opcode == FS_OPCODE_SIN || - opcode == FS_OPCODE_COS || - opcode == FS_OPCODE_POW); + return (opcode == SHADER_OPCODE_RCP || + opcode == SHADER_OPCODE_RSQ || + opcode == SHADER_OPCODE_SQRT || + opcode == SHADER_OPCODE_EXP2 || + opcode == SHADER_OPCODE_LOG2 || + opcode == SHADER_OPCODE_SIN || + opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_POW); } enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 529df0880f0..285ba46bd46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -149,28 +149,28 @@ fs_visitor::generate_math(fs_inst *inst, int op; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: op = BRW_MATH_FUNCTION_INV; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: op = BRW_MATH_FUNCTION_RSQ; break; - case FS_OPCODE_SQRT: + case SHADER_OPCODE_SQRT: op = BRW_MATH_FUNCTION_SQRT; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: op = BRW_MATH_FUNCTION_EXP; break; - case FS_OPCODE_LOG2: + case SHADER_OPCODE_LOG2: op = BRW_MATH_FUNCTION_LOG; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: op = BRW_MATH_FUNCTION_POW; break; - case FS_OPCODE_SIN: + case SHADER_OPCODE_SIN: op = BRW_MATH_FUNCTION_SIN; break; - case FS_OPCODE_COS: + case SHADER_OPCODE_COS: op = BRW_MATH_FUNCTION_COS; break; default: @@ -182,7 +182,7 @@ fs_visitor::generate_math(fs_inst *inst, if (intel->gen >= 6) { assert(inst->mlen == 0); - if (inst->opcode == FS_OPCODE_POW) { + if (inst->opcode == SHADER_OPCODE_POW) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); @@ -775,14 +775,14 @@ fs_visitor::generate_code() } break; - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: generate_math(inst, dst, src); break; case FS_OPCODE_PIXEL_X: diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 965a5b333a2..0ea4e5c36f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -69,26 +69,26 @@ public: int math_latency = 22; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: this->latency = 1 * chans * math_latency; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; - case FS_OPCODE_SQRT: - case FS_OPCODE_LOG2: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: this->latency = 8 * chans * math_latency; break; - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: /* minimum latency, max is 12 rounds. */ this->latency = 5 * chans * math_latency; break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2e3f9be75b4..8b4f5bbac15 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -250,14 +250,14 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); + emit_math(SHADER_OPCODE_RCP, this->result, op[0]); break; case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); + emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); break; case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); + emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); break; case ir_unop_exp: case ir_unop_log: @@ -265,11 +265,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sin: case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); + emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); + emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; case ir_unop_dFdx: @@ -340,11 +340,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); + emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); break; case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); + emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); break; case ir_unop_i2u: @@ -423,7 +423,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; case ir_unop_bit_not: @@ -1694,7 +1694,7 @@ fs_visitor::emit_interpolation_setup_gen4() interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); this->current_annotation = NULL; } @@ -1731,7 +1731,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); From af3c9803d818fd33139f1247a387d64b967b8992 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 2 May 2011 09:45:40 -0700 Subject: [PATCH 356/600] i965: Start adding the VS visitor and codegen. The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp. It's currently controlled by the INTEL_NEW_VS=1 environment variable, and only tested for the trivial "gl_Position = gl_Vertex;" shader so far. --- src/mesa/drivers/dri/i965/Makefile | 5 +- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_eu.h | 3 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 33 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 26 + src/mesa/drivers/dri/i965/brw_shader.h | 2 + src/mesa/drivers/dri/i965/brw_vec4.h | 434 +++++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 568 ++++++ .../dri/i965/brw_vec4_reg_allocate.cpp | 77 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 1649 +++++++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 12 +- src/mesa/drivers/dri/i965/brw_vs.h | 3 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- 14 files changed, 2781 insertions(+), 37 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4.h create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 44f28cd9d15..45a5350a383 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -124,7 +124,10 @@ CXX_SOURCES = \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp \ - brw_shader.cpp + brw_shader.cpp \ + brw_vec4_emit.cpp \ + brw_vec4_reg_allocate.cpp \ + brw_vec4_visitor.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cc11d06874d..7b6b64c1a5c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -529,7 +529,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index da8d016da42..e3823c65d1a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -638,6 +638,8 @@ enum opcode { FS_OPCODE_SPILL, FS_OPCODE_UNSPILL, FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 72d50eadbce..38dd99b693d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -44,6 +44,9 @@ #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 285ba46bd46..7367ccaa7e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -146,38 +146,7 @@ void fs_visitor::generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { - int op; - - switch (inst->opcode) { - case SHADER_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case SHADER_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case SHADER_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case SHADER_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case SHADER_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case SHADER_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case SHADER_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case SHADER_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } + int op = brw_math_function(inst->opcode); if (intel->gen >= 6) { assert(inst->mlen == 0); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index f4005f80055..2eeeec25cac 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op) return BRW_CONDITIONAL_NZ; } } + +uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + default: + assert(!"not reached: unknown math function"); + return 0; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 21671d1c8d6..1054d7a589e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -22,8 +22,10 @@ */ #include +#include "brw_defines.h" #pragma once int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); +uint32_t brw_math_function(enum opcode op); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h new file mode 100644 index 00000000000..10168fc1cb0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -0,0 +1,434 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_VEC4_H +#define BRW_VEC4_H + +#include +#include "brw_shader.h" +#include "main/compiler.h" +#include "program/hash_table.h" + +extern "C" { +#include "brw_vs.h" +#include "brw_context.h" +#include "brw_eu.h" +}; + +#include "../glsl/ir.h" + +namespace brw { + +class dst_reg; + +/** + * Common helper for constructing swizzles. When only a subset of + * channels of a vec4 are used, we don't want to reference the other + * channels, as that will tell optimization passes that those other + * channels are used. + */ +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + HW_REG, /* a struct brw_reg */ + ATTR, + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +class reg +{ +public: + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** virtual register number. 0 = fixed hw reg */ + int reg; + /** Offset within the virtual register. */ + int reg_offset; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool sechalf; + struct brw_reg fixed_hw_reg; + int smear; /* -1, or a channel of the reg to smear to all channels. */ + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +class src_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + + this->file = BAD_FILE; + } + + src_reg(register_file file, int reg, const glsl_type *type) + { + init(); + + this->file = file; + this->reg = reg; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + } + + /** Generic unset register constructor. */ + src_reg() + { + init(); + } + + src_reg(float f) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + src_reg(uint32_t u) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.f = u; + } + + src_reg(int32_t i) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + src_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit src_reg(dst_reg reg); + + GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ + bool negate; + bool abs; +}; + +class dst_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + this->file = BAD_FILE; + this->writemask = WRITEMASK_XYZW; + } + + dst_reg() + { + init(); + } + + dst_reg(register_file file, int reg) + { + init(); + + this->file = file; + this->reg = reg; + } + + dst_reg(struct brw_reg reg) + { + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + } + + dst_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit dst_reg(src_reg reg); + + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ +}; + +class vec4_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + struct brw_reg get_dst(void); + struct brw_reg get_src(int i); + + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + dst_reg dst; + src_reg src[3]; + + bool saturate; + bool predicate_inverse; + uint32_t predicate; + + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int sampler; + int target; /**< MRT target. */ + bool shadow_compare; + + bool eot; + bool header_present; + int mlen; /**< SEND message length */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + + uint32_t offset; /* spill/unspill offset */ + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; +}; + +class vec4_visitor : public ir_visitor +{ +public: + vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, struct brw_shader *shader); + ~vec4_visitor(); + + dst_reg dst_null_f() + { + return dst_reg(brw_null_reg()); + } + + dst_reg dst_null_d() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } + + dst_reg dst_null_cmp() + { + if (intel->gen > 4) + return dst_null_d(); + else + return dst_null_f(); + } + + struct brw_context *brw; + const struct gl_vertex_program *vp; + struct intel_context *intel; + struct gl_context *ctx; + struct brw_vs_compile *c; + struct brw_vs_prog_data *prog_data; + struct brw_compile *p; + struct brw_shader *shader; + struct gl_shader_program *prog; + void *mem_ctx; + exec_list instructions; + + char *fail_msg; + bool failed; + + /** + * GLSL IR currently being processed, which is associated with our + * driver IR instructions for debugging purposes. + */ + ir_instruction *base_ir; + const char *current_annotation; + + int *virtual_grf_sizes; + int virtual_grf_count; + int virtual_grf_array_size; + int first_non_payload_grf; + + dst_reg *variable_storage(ir_variable *var); + + void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); + + src_reg src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + src_reg result; + + /* Regs for vertex results. Generated at ir_variable visiting time + * for the ir->location's used. + */ + dst_reg output_reg[VERT_RESULT_MAX]; + + struct hash_table *variable_ht; + + bool run(void); + void fail(const char *msg, ...); + + int virtual_grf_alloc(int size); + int setup_attributes(int payload_reg); + void setup_payload(); + void reg_allocate_trivial(); + void reg_allocate(); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void visit_instructions(const exec_list *list); + + void emit_bool_to_cond_code(ir_rvalue *ir); + void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); + void emit_if_gen6(ir_if *ir); + + void emit_block_move(ir_assignment *ir); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0, src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, const src_reg &src); + + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + + int emit_vue_header_gen6(int header_mrf); + int emit_vue_header_gen4(int header_mrf); + void emit_urb_writes(void); + + GLboolean try_emit_sat(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void generate_code(); + void generate_vs_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + void generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_urb_write(vec4_instruction *inst); +}; + +} /* namespace brw */ + +#endif /* BRW_VEC4_H */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp new file mode 100644 index 00000000000..bdc7a79d83d --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -0,0 +1,568 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +extern "C" { +#include "brw_eu.h" +}; + +using namespace brw; + +namespace brw { + +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + } + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); + inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + return nr_attributes; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* r0 is always reserved, as it contains the payload with the URB + * handles that are passed on to the URB write at the end of the + * thread. + */ + reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + if (intel->gen >= 6) { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } + } + + /* FINISHME: push constants */ + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = 0; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; + + reg += setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +struct brw_reg +vec4_instruction::get_dst(void) +{ + struct brw_reg brw_reg; + + switch (dst.file) { + case GRF: + assert(dst.reg_offset == 0); + brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + brw_reg = retype(brw_reg, dst.type); + brw_reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + brw_reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + brw_reg = brw_null_reg(); + break; + + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + return brw_reg; +} + +struct brw_reg +vec4_instruction::get_src(int i) +{ + struct brw_reg brw_reg; + + switch (src[i].file) { + case GRF: + brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case IMM: + switch (src[i].type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(src[i].imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + + case HW_REG: + brw_reg = src[i].fixed_hw_reg; + break; + + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case ATTR: + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + + return brw_reg; +} + +void +vec4_visitor::generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +void +vec4_visitor::generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +void +vec4_visitor::generate_urb_write(vec4_instruction *inst) +{ + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + inst->base_mrf, /* starting mrf reg nr */ + brw_vec8_grf(0, 0), /* src */ + false, /* allocate */ + true, /* used */ + inst->mlen, + 0, /* response len */ + inst->eot, /* eot */ + inst->eot, /* writes complete */ + inst->offset, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); +} + +void +vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, + struct brw_reg dst, + struct brw_reg *src) +{ + vec4_instruction *inst = (vec4_instruction *)instruction; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; + + case SHADER_OPCODE_POW: + assert(!"finishme"); + break; + + case VS_OPCODE_URB_WRITE: + generate_urb_write(inst); + break; + + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + fail("unsupported opcode in `%s' in VS\n", + brw_opcodes[inst->opcode].name); + } else { + fail("Unsupported opcode %d in VS", inst->opcode); + } + } +} + +bool +vec4_visitor::run() +{ + /* Generate FS IR for main(). (the visitor only descends into + * functions called "main"). + */ + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + base_ir = ir; + ir->accept(this); + } + + emit_urb_writes(); + + if (failed) + return false; + + setup_payload(); + reg_allocate(); + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + +void +vec4_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("Native code for vertex shader %d:\n", prog->Name); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(i); + } + dst = inst->get_dst(); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicate); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + brw_IF(p, BRW_EXECUTE_8); + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + default: + generate_vs_instruction(inst, dst, src); + break; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} + +extern "C" { + +bool +brw_vs_emit(struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; + + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + /* FINISHME: Cleanly fail, test at link time, etc. */ + assert(!"not reached"); + return false; + } + + return true; +} + +} /* extern "C" */ + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp new file mode 100644 index 00000000000..e7f6b28a536 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -0,0 +1,77 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +using namespace brw; + +namespace brw { + +static void +assign(int *reg_hw_locations, reg *reg) +{ + if (reg->file == GRF) { + reg->reg = reg_hw_locations[reg->reg]; + } +} + +void +vec4_visitor::reg_allocate_trivial() +{ + int last_grf = 0; + int hw_reg_mapping[this->virtual_grf_count]; + int i; + int next; + + /* Note that compressed instructions require alignment to 2 registers. */ + hw_reg_mapping[0] = this->first_non_payload_grf; + next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; + for (i = 1; i < this->virtual_grf_count; i++) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } + prog_data->total_grf = next; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + if (last_grf >= BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + last_grf, BRW_MAX_GRF); + } +} + +void +vec4_visitor::reg_allocate() +{ + reg_allocate_trivial(); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp new file mode 100644 index 00000000000..bba1d810f19 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -0,0 +1,1649 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "main/macros.h" + +namespace brw { + +src_reg::src_reg(dst_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + + int swizzles[4]; + int next_chan = 0; + int last = 0; + + for (int i = 0; i < 4; i++) { + if (!(reg.writemask & (1 << i))) + continue; + + swizzles[next_chan++] = last = i; + } + + for (; next_chan < 4; next_chan++) { + swizzles[next_chan] = last; + } + + this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); +} + +dst_reg::dst_reg(src_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->writemask = WRITEMASK_XYZW; +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = this->base_ir; + inst->annotation = this->current_annotation; + + this->instructions.push_tail(inst); + + return inst; +} + + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) +{ + return emit(opcode, dst, src0, src1, src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) +{ + assert(dst.writemask != 0); + return emit(opcode, dst, src0, src_reg(), src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode) +{ + return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); +} + +void +vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +{ + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + emit(dot_opcodes[elements - 2], dst, src0, src1); +} + +void +vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) +{ + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the source to the corresponding slots + * of the destination generally work. + */ + src_reg expanded = src_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, dst, src); + src = expanded; + + emit(opcode, dst, src); +} + +void +vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) +{ + vec4_instruction *inst = emit(opcode, dst, src); + inst->base_mrf = 1; + inst->mlen = 1; +} + +void +vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) +{ + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + break; + default: + assert(!"not reached: bad math opcode"); + return; + } + + if (intel->gen >= 6) { + return emit_math1_gen6(opcode, dst, src); + } else { + return emit_math1_gen4(opcode, dst, src); + } +} + +void +vec4_visitor::emit_math2_gen6(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + src_reg expanded; + + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the sources to temporaries to make it + * generally work. + */ + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst, src0); + src0 = expanded; + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst, src1); + src1 = expanded; + + emit(opcode, dst, src0, src1); +} + +void +vec4_visitor::emit_math2_gen4(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + vec4_instruction *inst = emit(opcode, dst, src0, src1); + inst->base_mrf = 1; + inst->mlen = 2; +} + +void +vec4_visitor::emit_math(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + assert(opcode == SHADER_OPCODE_POW); + + if (intel->gen >= 6) { + return emit_math2_gen6(opcode, dst, src0, src1); + } else { + return emit_math2_gen4(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::visit_instructions(const exec_list *list) +{ + foreach_iter(exec_list_iterator, iter, *list) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + base_ir = ir; + ir->accept(this); + } +} + + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +int +vec4_visitor::virtual_grf_alloc(int size) +{ + if (virtual_grf_array_size <= virtual_grf_count) { + if (virtual_grf_array_size == 0) + virtual_grf_array_size = 16; + else + virtual_grf_array_size *= 2; + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); + } + virtual_grf_sizes[virtual_grf_count] = size; + return virtual_grf_count++; +} + +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->swizzle = BRW_SWIZZLE_NOOP; + } else { + this->swizzle = swizzle_for_size(type->vector_elements); + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg * +vec4_visitor::variable_storage(ir_variable *var) +{ + return (dst_reg *)hash_table_find(this->variable_ht, var); +} + +void +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + break; + } + return; + } + + ir->accept(this); + + if (intel->gen >= 6) { + vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), + this->result, src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +vec4_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + dst_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } + return; + } + + ir->condition->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), + this->result, src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +vec4_visitor::visit(ir_variable *ir) +{ + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + switch (ir->mode) { + case ir_var_in: + reg = new(mem_ctx) dst_reg(ATTR, ir->location); + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); + break; + + case ir_var_out: + reg = new(mem_ctx) dst_reg(this, ir->type); + hash_table_insert(this->variable_ht, reg, ir); + + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + } + break; + + case ir_var_temporary: + reg = new(mem_ctx) dst_reg(this, ir->type); + hash_table_insert(this->variable_ht, reg, ir); + + break; + + case ir_var_uniform: + /* FINISHME: uniforms */ + break; + } +} + +void +vec4_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + this->base_ir = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_instructions(&ir->body_instructions); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + + +void +vec4_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +vec4_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + visit_instructions(&sig->body); + } +} + +GLboolean +vec4_visitor::try_emit_sat(ir_expression *ir) +{ + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + src_reg src = this->result; + + this->result = src_reg(this, ir->type); + vec4_instruction *inst; + inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +vec4_visitor::emit_bool_comparison(unsigned int op, + dst_reg dst, src_reg src0, src_reg src1) +{ + /* original gen4 does destination conversion before comparison. */ + if (intel->gen < 5) + dst.type = src0.type; + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = brw_conditional_for_comparison(op); + + dst.type = BRW_REGISTER_TYPE_D; + emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); +} + +void +vec4_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + src_reg op[Elements(ir->operands)]; + src_reg result_src; + dst_reg result_dst; + vec4_instruction *inst; + + if (try_emit_sat(ir)) + return; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = BAD_FILE; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->print(); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = BAD_FILE; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = src_reg(this, ir->type); + /* convenience for the emit functions below. */ + result_dst = dst_reg(result_src); + /* If nothing special happens, this is the result. */ + this->result = result_src; + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + + case ir_unop_sign: + emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + break; + + case ir_unop_rcp: + emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + case ir_unop_dFdy: + assert(!"derivatives not valid in vertex shader"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + break; + } + + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + + case ir_unop_any: + emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_u2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + break; + } + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, result_dst, op[0]); + break; + + case ir_binop_min: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_binop_max: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case ir_binop_pow: + emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + } +} + + +void +vec4_visitor::visit(ir_swizzle *ir) +{ + src_reg src; + int i = 0; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != BAD_FILE); + + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } + for (; i < 4; i++) { + /* Replicate the last channel out. */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + + src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_variable *ir) +{ + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(brw_null_reg()); + return; + } + + this->result = src_reg(*reg); +} + +void +vec4_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *constant_index; + src_reg src; + int element_size = type_size(ir->type); + + constant_index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (constant_index) { + src.reg_offset += constant_index->value.i[0] * element_size; + } else { +#if 0 /* Variable array index */ + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = src_reg(this, glsl_type::float_type); + + emit(BRW_OPCODE_MUL, dst_reg(index_reg), + this->result, src_reg_for_float(element_size)); + } + + src.reladdr = ralloc(mem_ctx, src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); +#endif + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = BRW_SWIZZLE_NOOP; + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = BRW_SWIZZLE_NOOP; + + this->result.reg_offset += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static dst_reg +get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return dst_reg(v->result); +} + +void +vec4_visitor::emit_block_move(ir_assignment *ir) +{ + ir->rhs->accept(this); + src_reg src = this->result; + + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). + */ + dst.writemask = WRITEMASK_XYZW; + + for (int i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + +void +vec4_visitor::visit(ir_assignment *ir) +{ + if (!ir->lhs->type->is_scalar() && + !ir->lhs->type->is_vector()) { + emit_block_move(ir); + return; + } + + /* Now we're down to just a scalar/vector with writemasks. */ + int i; + + ir->rhs->accept(this); + src_reg src = this->result; + + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + int swizzles[4]; + int first_enabled_chan = 0; + int src_chan = 0; + + assert(ir->lhs->type->is_vector()); + dst.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while in our instructions we need to make + * those channels appear in the slots of the vec4 they're written to. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) + swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); + else + swizzles[i] = first_enabled_chan; + } + src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + + +void +vec4_visitor::visit(ir_constant *ir) +{ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + src_reg temp_base = src_reg(this, ir->type); + dst_reg temp = dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src_reg src = this->result; + + for (int i = 0; i < (unsigned int)size; i++) { + emit(BRW_OPCODE_MOV, temp, src); + + src.reg_offset++; + temp.reg_offset++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + src_reg temp_base = src_reg(this, ir->type); + dst_reg temp = dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + + for (unsigned int i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src_reg src = this->result; + for (int j = 0; j < size; j++) { + emit(BRW_OPCODE_MOV, temp, src); + + src.reg_offset++; + temp.reg_offset++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_matrix()) { + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + for (int i = 0; i < ir->type->matrix_columns; i++) { + for (int j = 0; j < ir->type->vector_elements; j++) { + dst.writemask = 1 << j; + emit(BRW_OPCODE_MOV, dst, + src_reg(ir->value.f[i * ir->type->vector_elements + j])); + } + dst.reg_offset++; + } + return; + } + + for (int i = 0; i < ir->type->vector_elements; i++) { + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + + dst.writemask = 1 << i; + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + break; + } + } +} + +void +vec4_visitor::visit(ir_call *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_texture *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_return *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_discard *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_if *ir) +{ + this->base_ir = ir->condition; + ir->condition->accept(this); + assert(this->result.file != BAD_FILE); + + /* FINISHME: condcode */ + emit(BRW_OPCODE_IF); + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + this->base_ir = ir->condition; + emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + this->base_ir = ir->condition; + emit(BRW_OPCODE_ENDIF); +} + +int +vec4_visitor::emit_vue_header_gen4(int header_mrf) +{ + /* Get the position */ + src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); + + /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ + dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + + current_annotation = "NDC"; + dst_reg ndc_w = ndc; + ndc_w.writemask = WRITEMASK_W; + src_reg pos_w = pos; + pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + + dst_reg ndc_xyz = ndc; + ndc_xyz.writemask = WRITEMASK_XYZ; + + emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { + dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + GLuint i; + + emit(BRW_OPCODE_MOV, header1, 0u); + + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + assert(!"finishme: psiz"); + src_reg psiz; + + header1.writemask = WRITEMASK_W; + emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); + emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + vec4_instruction *inst; + + inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), + pos, src_reg(c->userplane[i])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (brw->has_negative_rhw_bug) { +#if 0 + /* FINISHME */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif + } + + header1.writemask = WRITEMASK_XYZW; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); + } else { + emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), + BRW_REGISTER_TYPE_UD), 0u); + } + + if (intel->gen == 5) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + + /* user clip distance. */ + header_mrf += 2; + + /* Pad so that vertex element data (starts with position) is aligned. */ + header_mrf++; + } else { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + } + + return header_mrf; +} + +int +vec4_visitor::emit_vue_header_gen6(int header_mrf) +{ + struct brw_reg reg; + + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m2) of the header is indices, point width, clip flags. + * dword 4-7 (m3) is the 4D space position + * dword 8-15 (m4,m5) of the vertex header is the user clip distance if + * enabled. + * + * m4 or 6 is the first vertex element data we fill, which is + * the vertex position. + */ + + current_annotation = "indices, point width, clip flags"; + reg = brw_message_reg(header_mrf++); + emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ])); + } + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, + brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); + + current_annotation = "user clip distances"; + if (c->key.nr_userclip) { + for (int i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(header_mrf); + else + m = brw_message_reg(header_mrf + 1); + + emit(BRW_OPCODE_DP4, + dst_reg(brw_writemask(m, 1 << (i & 7))), + src_reg(c->userplane[i])); + } + header_mrf += 2; + } + + current_annotation = NULL; + + return header_mrf; +} + +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} + +/** + * Generates the VUE payload plus the 1 or 2 URB write instructions to + * complete the VS thread. + * + * The VUE layout is documented in Volume 2a. + */ +void +vec4_visitor::emit_urb_writes() +{ + int base_mrf = 1; + int mrf = base_mrf; + int urb_entry_size; + + /* FINISHME: edgeflag */ + + /* First mrf is the g0-based message header containing URB handles and such, + * which is implied in VS_OPCODE_URB_WRITE. + */ + mrf++; + + if (intel->gen >= 6) { + mrf = emit_vue_header_gen6(mrf); + } else { + mrf = emit_vue_header_gen4(mrf); + } + + int attr; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + /* This is loaded into the VUE header, and thus doesn't occupy + * an attribute slot. + */ + if (attr == VERT_RESULT_PSIZ) + continue; + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + + /* If this is MRF 15, we can't fit anything more into this URB + * WRITE. Note that base_mrf of 1 means that MRF 15 is an + * even-numbered amount of URB write data, which will meet + * gen6's requirements for length alignment. + */ + if (mrf == 15) + break; + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + + urb_entry_size = mrf - base_mrf; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + fail("Second URB write not supported.\n"); + break; + } + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; +} + +vec4_visitor::vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader) +{ + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->prog = prog; + this->shader = shader; + + this->mem_ctx = ralloc_context(NULL); + this->failed = false; + + this->base_ir = NULL; + this->current_annotation = NULL; + + this->c = c; + this->prog_data = &c->prog_data; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->virtual_grf_sizes = NULL; + this->virtual_grf_count = 0; + this->virtual_grf_array_size = 0; +} + +vec4_visitor::~vec4_visitor() +{ + hash_table_dtor(this->variable_ht); +} + + +void +vec4_visitor::fail(const char *format, ...) +{ + va_list va; + char *msg; + + if (failed) + return; + + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_VS) { + fprintf(stderr, "%s", msg); + } +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a9ad5311fe3..bd0677db151 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -30,6 +30,7 @@ */ +#include "main/compiler.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw, void *mem_ctx; int aux_size; int i; + static int new_vs = -1; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw, /* Emit GEN4 code. */ - brw_vs_emit(&c); + if (new_vs == -1) + new_vs = getenv("INTEL_NEW_VS") != NULL; + + if (new_vs) { + if (!brw_vs_emit(&c)) + brw_old_vs_emit(&c); + } else { + brw_old_vs_emit(&c); + } /* get the program */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 432994a8534..9f9fed33970 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -92,6 +92,7 @@ struct brw_vs_compile { GLboolean needs_stack; }; -void brw_vs_emit( struct brw_vs_compile *c ); +bool brw_vs_emit(struct brw_vs_compile *c); +void brw_old_vs_emit(struct brw_vs_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dbabb44e45c..a06a2bbec52 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_old_vs_emit(struct brw_vs_compile *c ) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 From a070d5f363e99b0f846d555e9ca3a74ec807fdc0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 12:50:16 -0700 Subject: [PATCH 357/600] i965/vs: Start adding support for uniforms There's no clever packing here, no pull constants, and no array support. --- src/mesa/drivers/dri/i965/brw_context.h | 22 ++- src/mesa/drivers/dri/i965/brw_curbe.c | 27 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 5 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 75 ++++++--- .../drivers/dri/i965/brw_vec4_visitor.cpp | 150 +++++++++++++++++- .../drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_vs_state.c | 21 ++- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +- 9 files changed, 257 insertions(+), 49 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7b6b64c1a5c..4a1abd6252e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -248,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -317,6 +318,13 @@ struct brw_vs_prog_data { /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index ae11c487a2c..960be10006e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } } @@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + } + } else { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 10168fc1cb0..01058243f04 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -356,6 +356,8 @@ public: * for the ir->location's used. */ dst_reg output_reg[VERT_RESULT_MAX]; + int uniform_size[MAX_UNIFORMS]; + int uniforms; struct hash_table *variable_ht; @@ -363,7 +365,10 @@ public: void fail(const char *msg, ...); int virtual_grf_alloc(int size); + int setup_uniform_values(int loc, const glsl_type *type); + void setup_builtin_uniform_values(ir_variable *ir); int setup_attributes(int payload_reg); + int setup_uniforms(int payload_reg); void setup_payload(); void reg_allocate_trivial(); void reg_allocate(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index bdc7a79d83d..1f2853e1118 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg) prog_data->urb_read_length = (nr_attributes + 1) / 2; - return nr_attributes; + return payload_reg + nr_attributes; } -void -vec4_visitor::setup_payload(void) +int +vec4_visitor::setup_uniforms(int reg) { - int reg = 0; - - /* r0 is always reserved, as it contains the payload with the URB - * handles that are passed on to the URB write at the end of the - * thread. - */ - reg++; - /* User clip planes from curbe: */ if (c->key.nr_userclip) { @@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void) } } - /* FINISHME: push constants */ - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = 0; - /* XXX 0 causes a bug elsewhere... */ - if (intel->gen < 6 && c->prog_data.nr_params == 0) - c->prog_data.nr_params = 4; + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (this->uniforms == 0) { + this->uniform_size[this->uniforms] = 1; - reg += setup_attributes(reg); + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + + c->prog_data.param[slot] = NULL; + c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; + } + + this->uniforms++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + /* for now, we are not doing any elimination of unused slots, nor + * are we packing our uniforms. + */ + c->prog_data.nr_params = this->uniforms * 4; + + c->prog_data.curb_read_length = reg - 1; + c->prog_data.uses_new_param_layout = true; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); this->first_non_payload_grf = reg; } @@ -174,6 +201,18 @@ vec4_instruction::get_src(int i) } break; + case UNIFORM: + brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, + ((src[i].reg + src[i].reg_offset) % 2) * 4), + 0, 4, 1); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + case HW_REG: brw_reg = src[i].fixed_hw_reg; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index bba1d810f19..91abd40faad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -22,7 +22,10 @@ */ #include "brw_vec4.h" +extern "C" { #include "main/macros.h" +#include "program/prog_parameter.h" +} namespace brw { @@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } +/* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ +int +vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) +{ + unsigned int offset = 0; + float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; + + if (type->is_matrix()) { + const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, + 1); + + for (unsigned int i = 0; i < type->matrix_columns; i++) { + offset += setup_uniform_values(loc + offset, column); + } + + return offset; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->vector_elements; i++) { + int slot = this->uniforms * 4 + i; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + } + c->prog_data.param[slot] = &values[i]; + } + + for (unsigned int i = type->vector_elements; i < 4; i++) { + c->prog_data.param_convert[this->uniforms * 4 + i] = + PARAM_CONVERT_ZERO; + c->prog_data.param[this->uniforms * 4 + i] = NULL; + } + + this->uniform_size[this->uniforms] = type->vector_elements; + this->uniforms++; + + return 1; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, + type->fields.structure[i].type); + } + return offset; + + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, type->fields.array); + } + return offset; + + case GLSL_TYPE_SAMPLER: + /* The sampler takes up a slot, but we don't use any values from it. */ + return 1; + + default: + assert(!"not reached"); + return 0; + } +} + +/* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ +void +vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) +{ + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->vp->Base.Parameters, + (gl_state_index *)slots[i].tokens); + float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; + + this->uniform_size[this->uniforms] = 0; + /* Add each of the unique swizzled channels of the element. + * This will end up matching the size of the glsl_type of this field. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + if (swiz == last_swiz) + break; + last_swiz = swiz; + + c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; + c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; + this->uniform_size[this->uniforms]++; + } + this->uniforms++; + } +} + dst_reg * vec4_visitor::variable_storage(ir_variable *var) { @@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir) switch (ir->mode) { case ir_var_in: reg = new(mem_ctx) dst_reg(ATTR, ir->location); - reg->type = brw_type_for_base_type(ir->type); - hash_table_insert(this->variable_ht, reg, ir); break; case ir_var_out: reg = new(mem_ctx) dst_reg(this, ir->type); - hash_table_insert(this->variable_ht, reg, ir); for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->location + i] = *reg; @@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir) case ir_var_temporary: reg = new(mem_ctx) dst_reg(this, ir->type); - hash_table_insert(this->variable_ht, reg, ir); - break; case ir_var_uniform: - /* FINISHME: uniforms */ + reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } break; } + + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); } void @@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->current_annotation = NULL; this->c = c; + this->vp = brw->vertex_program; /* FINISHME: change for precompile */ this->prog_data = &c->prog_data; this->variable_ht = hash_table_ctor(0, @@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->virtual_grf_sizes = NULL; this->virtual_grf_count = 0; this->virtual_grf_array_size = 0; + + this->uniforms = 0; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); } vec4_visitor::~vec4_visitor() diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index fb4fb146f8d..ad909789d82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw) constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - *brw->wm.prog_data->pull_param[i]); + brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e70454416bf..affa72c7324 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) params_uploaded++; } - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } } } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3d525248f25..07e9995f53b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index a102ca772b3..1d80e96778e 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { From 83d5850518388202c5589d3181b84fb54c213fb1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:18:00 -0700 Subject: [PATCH 358/600] i965/vs: Fix constant vector construction. Fixes some issues noticed in glsl-vs-all-01. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 91abd40faad..8ee4884098c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1391,10 +1391,10 @@ vec4_visitor::visit(ir_constant *ir) return; } - for (int i = 0; i < ir->type->vector_elements; i++) { - this->result = src_reg(this, ir->type); - dst_reg dst = dst_reg(this->result); + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + for (int i = 0; i < ir->type->vector_elements; i++) { dst.writemask = 1 << i; switch (ir->type->base_type) { From 4a4857246c79c42d918a84d7e28e9afff3a9ef6d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:23:42 -0700 Subject: [PATCH 359/600] i965/vs: Port the fix for clip plane writemasks from brw_vs_emit.c. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 8ee4884098c..439969ab7ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1618,7 +1618,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf) m = brw_message_reg(header_mrf + 1); emit(BRW_OPCODE_DP4, - dst_reg(brw_writemask(m, 1 << (i & 7))), + dst_reg(brw_writemask(m, 1 << (i & 3))), src_reg(c->userplane[i])); } header_mrf += 2; From c0f334a3ed3c6645abd1812e39cd52f1dfa32fa1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:29:48 -0700 Subject: [PATCH 360/600] i965/vs: Don't emit an extra copy of the vertex position. Fixes glsl-vs-abs-neg, glsl-vs-all-01, and probably many other tests. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 439969ab7ea..c4a3c8a8667 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1550,7 +1550,7 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf) * dword 8-11 (m3) of the vertex header is the 4D space position * dword 12-19 (m4,m5) of the vertex header is the user clip distance. * m6 is a pad so that the vertex element data is aligned - * m7 is the first vertex data we fill, which is the vertex position. + * m7 is the first vertex data we fill. */ current_annotation = "NDC"; emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); @@ -1561,15 +1561,14 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf) /* user clip distance. */ header_mrf += 2; - /* Pad so that vertex element data (starts with position) is aligned. */ + /* Pad so that vertex element data is aligned. */ header_mrf++; } else { /* There are 8 dwords in VUE header pre-Ironlake: * dword 0-3 (m1) is indices, point width, clip flags. * dword 4-7 (m2) is ndc position (set above) * - * dword 8-11 (m3) is the first vertex data, which we always have be the - * vertex position. + * dword 8-11 (m3) is the first vertex data. */ current_annotation = "NDC"; emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); @@ -1592,8 +1591,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf) * dword 8-15 (m4,m5) of the vertex header is the user clip distance if * enabled. * - * m4 or 6 is the first vertex element data we fill, which is - * the vertex position. + * m4 or 6 is the first vertex element data we fill. */ current_annotation = "indices, point width, clip flags"; @@ -1681,6 +1679,10 @@ vec4_visitor::emit_urb_writes() if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) continue; + /* This is set up in the VUE header. */ + if (attr == VERT_RESULT_HPOS) + continue; + /* This is loaded into the VUE header, and thus doesn't occupy * an attribute slot. */ From 82aa9299fbfe92d2526fa9f7ffd2a1ebc7827ee9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:31:30 -0700 Subject: [PATCH 361/600] i965/vs: Allow scalar values in assignments, too. Fixes glsl-vs-all-02 and many other tests. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c4a3c8a8667..e3779ab0444 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1285,7 +1285,8 @@ vec4_visitor::visit(ir_assignment *ir) int first_enabled_chan = 0; int src_chan = 0; - assert(ir->lhs->type->is_vector()); + assert(ir->lhs->type->is_vector() || + ir->lhs->type->is_scalar()); dst.writemask = ir->write_mask; for (int i = 0; i < 4; i++) { From 78fac1892a3a7a90eb7baf78903d70649028d27a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:35:24 -0700 Subject: [PATCH 362/600] i965/vs: Allocate storage for "auto" variables just like temps. Fixes segfault in glsl-vs-cross-2. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e3779ab0444..3e62c9ebba8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -634,6 +634,7 @@ vec4_visitor::visit(ir_variable *ir) } break; + case ir_var_auto: case ir_var_temporary: reg = new(mem_ctx) dst_reg(this, ir->type); break; @@ -647,6 +648,9 @@ vec4_visitor::visit(ir_variable *ir) setup_uniform_values(ir->location, ir->type); } break; + + default: + assert(!"not reached"); } reg->type = brw_type_for_base_type(ir->type); From bb468fc1ede9b0a5231ebfaa51df444502d33654 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:37:18 -0700 Subject: [PATCH 363/600] i965/vs: Fix ir_swizzle handling. I decided to refactor it a bit in adapting ir_to_mesa.cpp code, and mangled it. Fixes glsl-vs-cross-2. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3e62c9ebba8..4f2a2011068 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1109,7 +1109,7 @@ vec4_visitor::visit(ir_swizzle *ir) src = this->result; assert(src.file != BAD_FILE); - if (i < ir->type->vector_elements) { + for (i = 0; i < ir->type->vector_elements; i++) { switch (i) { case 0: swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); From aa753c5a14637ede804e8043762693122174bf8c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:05:42 -0700 Subject: [PATCH 364/600] i965/vs: Disable loops for now until rendering is generally correct. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4f2a2011068..c3b55db4ac1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -662,6 +662,8 @@ vec4_visitor::visit(ir_loop *ir) { ir_dereference_variable *counter = NULL; + fail("not yet\n"); + /* We don't want debugging output to print the whole body of the * loop as the annotation. */ From 164ccd27787e0df4ae6f85a7178aff0720d56ac9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:12:16 -0700 Subject: [PATCH 365/600] i965/vs: Fix support for "IF" instructions by copying brw_fs_visitor.cpp. Fixes glsl-vs-if-greater. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c3b55db4ac1..014f7e62284 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1451,12 +1451,18 @@ vec4_visitor::visit(ir_discard *ir) void vec4_visitor::visit(ir_if *ir) { + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ this->base_ir = ir->condition; - ir->condition->accept(this); - assert(this->result.file != BAD_FILE); - /* FINISHME: condcode */ - emit(BRW_OPCODE_IF); + if (intel->gen == 6) { + emit_if_gen6(ir); + } else { + emit_bool_to_cond_code(ir->condition); + vec4_instruction *inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_NORMAL; + } visit_instructions(&ir->then_instructions); From eca762d831e099b549dafa0be896eac82b3fceb9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:18:31 -0700 Subject: [PATCH 366/600] i965/vs: Fix support for zero uniforms in use. We were looking for attributes in the wrong place, and pointlessly doing the work on gen6 at all. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 1f2853e1118..be089369bcf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -94,7 +94,7 @@ vec4_visitor::setup_uniforms(int reg) /* The pre-gen6 VS requires that some push constants get loaded no * matter what, or the GPU would hang. */ - if (this->uniforms == 0) { + if (intel->gen < 6 && this->uniforms == 0) { this->uniform_size[this->uniforms] = 1; for (unsigned int i = 0; i < 4; i++) { @@ -105,6 +105,7 @@ vec4_visitor::setup_uniforms(int reg) } this->uniforms++; + reg++; } else { reg += ALIGN(uniforms, 2) / 2; } From e5363c7fd2ed6318e86ba4a62adc0c2377e51eef Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:29:41 -0700 Subject: [PATCH 367/600] i965/vs: Use an appropriate swizzle on src regs from variables. Fixes glsl-vs-if-bool. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 014f7e62284..734e2514536 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1140,6 +1140,7 @@ vec4_visitor::visit(ir_swizzle *ir) void vec4_visitor::visit(ir_dereference_variable *ir) { + const struct glsl_type *type = ir->type; dst_reg *reg = variable_storage(ir->var); if (!reg) { @@ -1149,6 +1150,9 @@ vec4_visitor::visit(ir_dereference_variable *ir) } this->result = src_reg(*reg); + + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + this->result.swizzle = swizzle_for_size(type->vector_elements); } void From 814a9bef30beda427e8fbf6f3b8abb6a45f0e2e4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:31:53 -0700 Subject: [PATCH 368/600] i965/vs: Drop the assertion about dst.reg_offset == 0. Adding the offset is the right thing to do here, and fixes glsl-vs-mat-add-1. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index be089369bcf..a41c58c7d52 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -146,7 +146,6 @@ vec4_instruction::get_dst(void) switch (dst.file) { case GRF: - assert(dst.reg_offset == 0); brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); brw_reg = retype(brw_reg, dst.type); brw_reg.dw1.bits.writemask = dst.writemask; From 8e947c2546c25c0dfa93b538e54113af1bf582df Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:38:44 -0700 Subject: [PATCH 369/600] i965/vs: Fix the types of array/struct dereferences. Fixes glsl-vs-arrays-3. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 734e2514536..b6f3cbc265f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1198,6 +1198,7 @@ vec4_visitor::visit(ir_dereference_array *ir) src.swizzle = swizzle_for_size(ir->type->vector_elements); else src.swizzle = BRW_SWIZZLE_NOOP; + src.type = brw_type_for_base_type(ir->type); this->result = src; } @@ -1222,6 +1223,7 @@ vec4_visitor::visit(ir_dereference_record *ir) this->result.swizzle = swizzle_for_size(ir->type->vector_elements); else this->result.swizzle = BRW_SWIZZLE_NOOP; + this->result.type = brw_type_for_base_type(ir->type); this->result.reg_offset += offset; } From c3752b399ab376aa53392afb8f2d4b526054f0a8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:40:46 -0700 Subject: [PATCH 370/600] i965/vs: Add support for dot product opcodes. Fixes glsl-vs-dot-vec2. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index a41c58c7d52..71caf907b38 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -447,6 +447,18 @@ vec4_visitor::generate_code() brw_SEL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_DP4: + brw_DP4(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP3: + brw_DP3(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP2: + brw_DP2(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ From 2b7632aeaa5f8b4ab3da7d33a3c71c71023a072a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:03:31 -0700 Subject: [PATCH 371/600] i965/vs: Add support for if(any_nequal()) and if(all_equal()) on gen6. Fixes vs-temp-array-mat2-col-rd.shader_test. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++- .../drivers/dri/i965/brw_vec4_visitor.cpp | 23 ++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 71caf907b38..bc3110b0458 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -465,7 +465,8 @@ vec4_visitor::generate_code() assert(intel->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { - brw_IF(p, BRW_EXECUTE_8); + struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); + brw_inst->header.predicate_control = inst->predicate; } if_depth_in_loop[loop_stack_depth]++; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b6f3cbc265f..4237373c13d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -543,7 +543,9 @@ vec4_visitor::emit_if_gen6(ir_if *ir) assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); + assert(expr->operands[i]->type->is_scalar() || + expr->operation == ir_binop_any_nequal || + expr->operation == ir_binop_all_equal); expr->operands[i]->accept(this); op[i] = this->result; @@ -589,13 +591,28 @@ vec4_visitor::emit_if_gen6(ir_if *ir) case ir_binop_less: case ir_binop_lequal: case ir_binop_equal: - case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); inst->conditional_mod = brw_conditional_for_comparison(expr->operation); return; + + case ir_binop_all_equal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + return; + + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + default: assert(!"not reached"); inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); From cda28bca0d789c328d19bf90afd35a5ff74cfb77 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:16:21 -0700 Subject: [PATCH 372/600] i965/vs: Apply the gen6 math workaround for math1 instructions. Fixes glsl-vs-masked-cos. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4237373c13d..7e0535b5c02 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -122,14 +122,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) { /* The gen6 math instruction ignores the source modifiers -- * swizzle, abs, negate, and at least some parts of the register - * region description. Move the source to the corresponding slots - * of the destination generally work. + * region description. */ - src_reg expanded = src_reg(this, glsl_type::float_type); - emit(BRW_OPCODE_MOV, dst, src); - src = expanded; + src_reg temp_src = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); - emit(opcode, dst, src); + emit(opcode, dst, temp_src); } void From 930afd1774bdcd013bccbd7b5717ae0bb8e3dea3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:26:48 -0700 Subject: [PATCH 373/600] i965/vs: Don't forget to set up assignment condition code for arrays/structs. Fixes vs-uniform-array-mat2-index-col-rd. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 7e0535b5c02..27620c47085 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1276,6 +1276,10 @@ vec4_visitor::emit_block_move(ir_assignment *ir) dst_reg dst = get_assignment_lhs(ir->lhs, this); + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + /* FINISHME: This should really set to the correct maximal writemask for each * FINISHME: component written (in the loops below). */ From 979072613139870f12e329e4b483c7f688b40560 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:46:03 -0700 Subject: [PATCH 374/600] i965/vs: Handle assignment of structures/arrays/matrices better. This gets the right types on the instructions, as well as emitting minimal swizzles/writemasks. --- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 75 +++++++++++++------ 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 01058243f04..1619c2e1ef6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -390,7 +390,8 @@ public: void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); void emit_if_gen6(ir_if *ir); - void emit_block_move(ir_assignment *ir); + void emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated); /** * Emit the correct dot-product instruction for the type of arguments diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 27620c47085..4f7763d61bb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1269,38 +1269,69 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) } void -vec4_visitor::emit_block_move(ir_assignment *ir) +vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated) { - ir->rhs->accept(this); - src_reg src = this->result; - - dst_reg dst = get_assignment_lhs(ir->lhs, this); - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); + if (type->base_type == GLSL_TYPE_STRUCT) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.structure[i].type, predicated); + } + return; } - /* FINISHME: This should really set to the correct maximal writemask for each - * FINISHME: component written (in the loops below). - */ - dst.writemask = WRITEMASK_XYZW; - - for (int i = 0; i < type_size(ir->lhs->type); i++) { - vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); - if (ir->condition) - inst->predicate = BRW_PREDICATE_NORMAL; - - dst.reg_offset++; - src.reg_offset++; + if (type->is_array()) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.array, predicated); + } + return; } + + if (type->is_matrix()) { + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, 1); + + for (int i = 0; i < type->matrix_columns; i++) { + emit_block_move(dst, src, vec_type, predicated); + } + return; + } + + assert(type->is_scalar() || type->is_vector()); + + dst->type = brw_type_for_base_type(type); + src->type = dst->type; + + dst->writemask = (1 << type->vector_elements) - 1; + + /* Do we need to worry about swizzling a swizzle? */ + assert(src->swizzle = BRW_SWIZZLE_NOOP); + src->swizzle = swizzle_for_size(type->vector_elements); + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); + if (predicated) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst->reg_offset++; + src->reg_offset++; } void vec4_visitor::visit(ir_assignment *ir) { + dst_reg dst = get_assignment_lhs(ir->lhs, this); + if (!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()) { - emit_block_move(ir); + ir->rhs->accept(this); + src_reg src = this->result; + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); return; } @@ -1310,8 +1341,6 @@ vec4_visitor::visit(ir_assignment *ir) ir->rhs->accept(this); src_reg src = this->result; - dst_reg dst = get_assignment_lhs(ir->lhs, this); - int swizzles[4]; int first_enabled_chan = 0; int src_chan = 0; From aba9801996f2f524a765df378c234a7645b3a5d1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:54:25 -0700 Subject: [PATCH 375/600] i965/vs: Fix types of varying outputs. For structs/arrays/matrices, they were ending up as uint because we forgot to set them. All varyings in GLSL 1.20 are of base type float, so just force the matter here (which gets inherited at emit_urb_writes() time). Fixes vs-varying-array-mat2-col-rd. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4f7763d61bb..5e2b3e5a5fe 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -646,6 +646,7 @@ vec4_visitor::visit(ir_variable *ir) for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->location + i] = *reg; output_reg[ir->location + i].reg_offset = i; + output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; } break; From 31ef2e3ec2f5837eea0899b4bda5ea15e335a6a2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 21:22:36 -0700 Subject: [PATCH 376/600] i965/vs: Avoid generating extra moves when setting up large ir_constants. We were also screwing up the types in the process, and just not emitting moves was easier. --- src/mesa/drivers/dri/i965/brw_vec4.h | 2 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 77 +++++++------------ 2 files changed, 28 insertions(+), 51 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 1619c2e1ef6..3e457fc61aa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -393,6 +393,8 @@ public: void emit_block_move(dst_reg *dst, src_reg *src, const struct glsl_type *type, bool predicated); + void emit_constant_values(dst_reg *dst, ir_constant *value); + /** * Emit the correct dot-product instruction for the type of arguments */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5e2b3e5a5fe..3562779413f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1387,96 +1387,71 @@ vec4_visitor::visit(ir_assignment *ir) } } - void -vec4_visitor::visit(ir_constant *ir) +vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) { if (ir->type->base_type == GLSL_TYPE_STRUCT) { - src_reg temp_base = src_reg(this, ir->type); - dst_reg temp = dst_reg(temp_base); + foreach_list(node, &ir->components) { + ir_constant *field_value = (ir_constant *)node; - foreach_iter(exec_list_iterator, iter, ir->components) { - ir_constant *field_value = (ir_constant *)iter.get(); - int size = type_size(field_value->type); - - assert(size > 0); - - field_value->accept(this); - src_reg src = this->result; - - for (int i = 0; i < (unsigned int)size; i++) { - emit(BRW_OPCODE_MOV, temp, src); - - src.reg_offset++; - temp.reg_offset++; - } + emit_constant_values(dst, field_value); } - this->result = temp_base; return; } if (ir->type->is_array()) { - src_reg temp_base = src_reg(this, ir->type); - dst_reg temp = dst_reg(temp_base); - int size = type_size(ir->type->fields.array); - - assert(size > 0); - for (unsigned int i = 0; i < ir->type->length; i++) { - ir->array_elements[i]->accept(this); - src_reg src = this->result; - for (int j = 0; j < size; j++) { - emit(BRW_OPCODE_MOV, temp, src); - - src.reg_offset++; - temp.reg_offset++; - } + emit_constant_values(dst, ir->array_elements[i]); } - this->result = temp_base; return; } if (ir->type->is_matrix()) { - this->result = src_reg(this, ir->type); - dst_reg dst = dst_reg(this->result); - - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - for (int i = 0; i < ir->type->matrix_columns; i++) { for (int j = 0; j < ir->type->vector_elements; j++) { - dst.writemask = 1 << j; - emit(BRW_OPCODE_MOV, dst, + dst->writemask = 1 << j; + dst->type = BRW_REGISTER_TYPE_F; + + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i * ir->type->vector_elements + j])); } - dst.reg_offset++; + dst->reg_offset++; } return; } - this->result = src_reg(this, ir->type); - dst_reg dst = dst_reg(this->result); - for (int i = 0; i < ir->type->vector_elements; i++) { - dst.writemask = 1 << i; + dst->writemask = 1 << i; + dst->type = brw_type_for_base_type(ir->type); switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); break; case GLSL_TYPE_INT: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); break; case GLSL_TYPE_UINT: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); break; case GLSL_TYPE_BOOL: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); break; default: assert(!"Non-float/uint/int/bool constant"); break; } } + dst->reg_offset++; +} + +void +vec4_visitor::visit(ir_constant *ir) +{ + dst_reg dst = dst_reg(this, ir->type); + this->result = src_reg(dst); + + emit_constant_values(&dst, ir); } void From 160a5a3ff0fc826a2978c6bea6de21b445f612e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 21:53:00 -0700 Subject: [PATCH 377/600] i965/vs: Add support for VUEs larger than a single URB write. Fixes glsl-max-varyings. --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3562779413f..f90025c8e7e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1703,6 +1703,7 @@ vec4_visitor::emit_urb_writes() int base_mrf = 1; int mrf = base_mrf; int urb_entry_size; + uint64_t outputs_remaining = c->prog_data.outputs_written; /* FINISHME: edgeflag */ @@ -1717,11 +1718,14 @@ vec4_visitor::emit_urb_writes() mrf = emit_vue_header_gen4(mrf); } + /* Set up the VUE data for the first URB write */ int attr; for (attr = 0; attr < VERT_RESULT_MAX; attr++) { if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) continue; + outputs_remaining &= ~BITFIELD64_BIT(attr); + /* This is set up in the VUE header. */ if (attr == VERT_RESULT_HPOS) continue; @@ -1734,27 +1738,49 @@ vec4_visitor::emit_urb_writes() emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); - /* If this is MRF 15, we can't fit anything more into this URB + /* If this was MRF 15, we can't fit anything more into this URB * WRITE. Note that base_mrf of 1 means that MRF 15 is an * even-numbered amount of URB write data, which will meet * gen6's requirements for length alignment. */ - if (mrf == 15) + if (mrf == 16) { + attr++; break; + } } vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); - inst->eot = true; + inst->eot = !outputs_remaining; urb_entry_size = mrf - base_mrf; - for (; attr < VERT_RESULT_MAX; attr++) { - if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) - continue; - fail("Second URB write not supported.\n"); - break; + /* Optional second URB write */ + if (outputs_remaining) { + mrf = base_mrf + 1; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + + assert(mrf != 16); + } + + inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + /* URB destination offset. In the previous write, we got MRFs 2- + * 15 MRFs minus the one header MRF, so 14 regs. URB offset is in + * URB row increments, and each of our MRFs is half of one of + * those, since we're doing interleaved writes. + */ + inst->offset = 14 / 2; + + urb_entry_size += mrf - base_mrf; } if (intel->gen == 6) From e355b179b2bd42a585464f17759764083fa3ef26 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:43:49 -0700 Subject: [PATCH 378/600] i965: Remove dead brw->wm.max_threads field. --- src/mesa/drivers/dri/i965/brw_context.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4a1abd6252e..38b13098bc0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -735,7 +735,6 @@ struct brw_context GLuint render_surf; GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; GLuint sampler_count; From 2b224d66a01f3ce867fb05558b25749705bbfe7a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:44:15 -0700 Subject: [PATCH 379/600] i965: Set up allocation of a VS scratch space if required. --- src/mesa/drivers/dri/i965/brw_context.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_program.c | 28 +++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 9 ++++++++ src/mesa/drivers/dri/i965/brw_vs.h | 1 + src/mesa/drivers/dri/i965/brw_wm.c | 25 +++------------------- 5 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 38b13098bc0..add8c568795 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -312,6 +312,7 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint total_scratch; GLuint inputs_read; @@ -671,6 +672,7 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; /** Offset in the program cache to the VS program */ uint32_t prog_offset; @@ -858,6 +860,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6674f1640c8..09b5be4c96e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, return GL_TRUE; } +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +int +brw_get_scratch_size(int size) +{ + int i; + + for (i = 1024; i < size; i *= 2) + ; + + return i; +} + +void +brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size) +{ + drm_intel_bo *old_bo = *scratch_bo; + + if (old_bo && old_bo->size < size) { + drm_intel_bo_unreference(old_bo); + old_bo = NULL; + } + + if (!old_bo) { + *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096); + } +} + void brwInitFragProgFuncs( struct dd_function_table *functions ) { assert(functions->ProgramStringNotify == _tnl_program_string); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index bd0677db151..d389f602fba 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -45,6 +45,7 @@ static void do_vs_prog( struct brw_context *brw, struct brw_vs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; @@ -97,6 +98,14 @@ static void do_vs_prog( struct brw_context *brw, brw_old_vs_emit(&c); } + /* Scratch space is used for register spilling */ + if (c.last_scratch) { + c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); + + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c.prog_data.total_scratch * brw->vs_max_threads); + } + /* get the program */ program = brw_get_program(&c.func, &program_size); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9f9fed33970..83a37f5b800 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -66,6 +66,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint last_scratch; GLuint first_tmp; GLuint last_tmp; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index d13ac6124c8..a4524fc7889 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -244,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw, /* Scratch space is used for register spilling */ if (c->last_scratch) { - uint32_t total_scratch; + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); - /* Per-thread scratch space is power-of-two sized. */ - for (c->prog_data.total_scratch = 1024; - c->prog_data.total_scratch <= c->last_scratch; - c->prog_data.total_scratch *= 2) { - /* empty */ - } - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; - - if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total_scratch, - 4096); - } - } - else { - c->prog_data.total_scratch = 0; + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c->prog_data.total_scratch * brw->wm_max_threads); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) From 314c2574ff6e562a6cfc5fb84980f092e495a948 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:47:54 -0700 Subject: [PATCH 380/600] i965: Add remaining scratch space setup emit to unit states. --- src/mesa/drivers/dri/i965/brw_vs_state.c | 10 ++++++++++ src/mesa/drivers/dri/i965/gen6_vs_state.c | 10 +++++++++- src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 +++++++++- src/mesa/drivers/dri/i965/gen7_wm_state.c | 8 +++++++- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index fc4373ab311..29b3e47ab0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw) else vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + if (brw->vs.prog_data->total_scratch != 0) { + vs->thread2.scratch_space_base_pointer = + brw->vs.scratch_bo->offset >> 10; /* reloc */ + vs->thread2.per_thread_scratch_space = + ffs(brw->vs.prog_data->total_scratch) - 11; + } else { + vs->thread2.scratch_space_base_pointer = 0; + vs->thread2.per_thread_scratch_space = 0; + } + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs->thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index affa72c7324..b94121e8437 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -160,7 +160,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0fad3d2fb68..f3cd5d15bf0 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 1d80e96778e..55a603e887a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); - OUT_BATCH(0); /* scratch space base offset */ + if (brw->wm.prog_data->total_scratch) { + OUT_RELOC(brw->wm.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->wm.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ From 1ff4f11dd94711a498cde0330101c58636ef2741 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:59:39 -0700 Subject: [PATCH 381/600] i965/vs: Track the variable index of array accesses. This isn't used currently, as we lower all array accesses. --- src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 3e457fc61aa..bb40c71e4c9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -169,6 +169,8 @@ public: GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ bool negate; bool abs; + + src_reg *reladdr; }; class dst_reg : public reg @@ -219,6 +221,8 @@ public: explicit dst_reg(src_reg reg); int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + + src_reg *reladdr; }; class vec4_instruction : public exec_node { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f90025c8e7e..8bd048ff459 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -37,6 +37,7 @@ src_reg::src_reg(dst_reg reg) this->reg = reg.reg; this->reg_offset = reg.reg_offset; this->type = reg.type; + this->reladdr = reg.reladdr; int swizzles[4]; int next_chan = 0; @@ -66,6 +67,7 @@ dst_reg::dst_reg(src_reg reg) this->reg_offset = reg.reg_offset; this->type = reg.type; this->writemask = WRITEMASK_XYZW; + this->reladdr = reg.reladdr; } vec4_instruction * @@ -1186,7 +1188,6 @@ vec4_visitor::visit(ir_dereference_array *ir) if (constant_index) { src.reg_offset += constant_index->value.i[0] * element_size; } else { -#if 0 /* Variable array index */ /* Variable index array dereference. It eats the "vec4" of the * base of the array and an index that offsets the Mesa register * index. @@ -1198,15 +1199,22 @@ vec4_visitor::visit(ir_dereference_array *ir) if (element_size == 1) { index_reg = this->result; } else { - index_reg = src_reg(this, glsl_type::float_type); + index_reg = src_reg(this, glsl_type::int_type); emit(BRW_OPCODE_MUL, dst_reg(index_reg), - this->result, src_reg_for_float(element_size)); + this->result, src_reg(element_size)); + } + + if (src.reladdr) { + src_reg temp = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); + + index_reg = temp; } src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); -#endif } /* If the type is smaller than a vec4, replicate the last channel out. */ From 758c3c2b4588f235def48b2f28c0479a70f7c194 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 15:21:25 -0700 Subject: [PATCH 382/600] i965/vs: Reserve MRF 14/15 for array loads/register unspilling. --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 8bd048ff459..e01318af1ab 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1708,10 +1708,18 @@ align_interleaved_urb_mlen(struct brw_context *brw, int mlen) void vec4_visitor::emit_urb_writes() { + /* MRF 0 is reserved for the debugger, so start with message header + * in MRF 1. + */ int base_mrf = 1; int mrf = base_mrf; int urb_entry_size; uint64_t outputs_remaining = c->prog_data.outputs_written; + /* In the process of generating our URB write message contents, we + * may need to unspill a register or load from an array. Those + * reads would use MRFs 14-15. + */ + int max_usable_mrf = 13; /* FINISHME: edgeflag */ @@ -1751,7 +1759,7 @@ vec4_visitor::emit_urb_writes() * even-numbered amount of URB write data, which will meet * gen6's requirements for length alignment. */ - if (mrf == 16) { + if (mrf > max_usable_mrf) { attr++; break; } @@ -1772,21 +1780,21 @@ vec4_visitor::emit_urb_writes() if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) continue; - emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + assert(mrf < max_usable_mrf); - assert(mrf != 16); + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); } inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); inst->eot = true; - /* URB destination offset. In the previous write, we got MRFs 2- - * 15 MRFs minus the one header MRF, so 14 regs. URB offset is in + /* URB destination offset. In the previous write, we got MRFs + * 2-13 minus the one header MRF, so 12 regs. URB offset is in * URB row increments, and each of our MRFs is half of one of * those, since we're doing interleaved writes. */ - inst->offset = 14 / 2; + inst->offset = (max_usable_mrf - base_mrf) / 2; urb_entry_size += mrf - base_mrf; } From d0e4d71070cd7fa197ed98612782484ec1f27123 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 12:15:26 -0700 Subject: [PATCH 383/600] i965/vs: Move virtual GRFs with array accesses to them to scratch space. --- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_vec4.h | 12 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 10 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 163 ++++++++++++++++++ 4 files changed, 186 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e3823c65d1a..b740d87c933 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -640,6 +640,8 @@ enum opcode { FS_OPCODE_PULL_CONSTANT_LOAD, VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index bb40c71e4c9..2f171b72049 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -376,6 +376,7 @@ public: void setup_payload(); void reg_allocate_trivial(); void reg_allocate(); + void move_grf_array_access_to_scratch(); vec4_instruction *emit(enum opcode opcode); @@ -424,6 +425,17 @@ public: int emit_vue_header_gen4(int header_mrf); void emit_urb_writes(void); + src_reg get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset); + void emit_scratch_read(vec4_instruction *inst, + dst_reg dst, + src_reg orig_src, + int base_offset); + void emit_scratch_write(vec4_instruction *inst, + src_reg temp, + dst_reg orig_dst, + int base_offset); + GLboolean try_emit_sat(ir_expression *ir); bool process_move_condition(ir_rvalue *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index bc3110b0458..57eb467567e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -321,7 +321,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, bool vec4_visitor::run() { - /* Generate FS IR for main(). (the visitor only descends into + /* Generate VS IR for main(). (the visitor only descends into * functions called "main"). */ foreach_iter(exec_list_iterator, iter, *shader->ir) { @@ -332,6 +332,14 @@ vec4_visitor::run() emit_urb_writes(); + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + move_grf_array_access_to_scratch(); + if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e01318af1ab..049af6c3992 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1805,6 +1805,169 @@ vec4_visitor::emit_urb_writes() c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; } +src_reg +vec4_visitor::get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset) +{ + /* Because we store the values to scratch interleaved like our + * vertex data, we need to scale the vec4 index by 2. + */ + int message_header_scale = 2; + + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. + */ + if (intel->gen < 6) + message_header_scale *= 16; + + if (reladdr) { + src_reg index = src_reg(this, glsl_type::int_type); + + vec4_instruction *add = emit(BRW_OPCODE_ADD, + dst_reg(index), + *reladdr, + src_reg(reg_offset)); + /* Move our new instruction from the tail to its correct place. */ + add->remove(); + inst->insert_before(add); + + vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), + index, src_reg(message_header_scale)); + mul->remove(); + inst->insert_before(mul); + + return index; + } else { + return src_reg(reg_offset * message_header_scale); + } +} + +/** + * Emits an instruction before @inst to load the value named by @orig_src + * from scratch space at @base_offset to @temp. + */ +void +vec4_visitor::emit_scratch_read(vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset) +{ + int reg_offset = base_offset + orig_src.reg_offset; + src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); + + vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, + temp, index); + + scratch_read_inst->base_mrf = 14; + scratch_read_inst->mlen = 1; + /* Move our instruction from the tail to its correct place. */ + scratch_read_inst->remove(); + inst->insert_before(scratch_read_inst); +} + +/** + * Emits an instruction after @inst to store the value to be written + * to @orig_dst to scratch space at @base_offset, from @temp. + */ +void +vec4_visitor::emit_scratch_write(vec4_instruction *inst, + src_reg temp, dst_reg orig_dst, + int base_offset) +{ + int reg_offset = base_offset + orig_dst.reg_offset; + src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); + + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), + orig_dst.writemask)); + vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, + dst, temp, index); + scratch_write_inst->base_mrf = 13; + scratch_write_inst->mlen = 2; + scratch_write_inst->predicate = inst->predicate; + /* Move our instruction from the tail to its correct place. */ + scratch_write_inst->remove(); + inst->insert_after(scratch_write_inst); +} + +/** + * We can't generally support array access in GRF space, because a + * single instruction's destination can only span 2 contiguous + * registers. So, we send all GRF arrays that get variable index + * access to scratch space. + */ +void +vec4_visitor::move_grf_array_access_to_scratch() +{ + int scratch_loc[this->virtual_grf_count]; + + for (int i = 0; i < this->virtual_grf_count; i++) { + scratch_loc[i] = -1; + } + + /* First, calculate the set of virtual GRFs that need to be punted + * to scratch due to having any array access on them, and where in + * scratch. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && inst->dst.reladdr && + scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; + } + + for (int i = 0 ; i < 3; i++) { + src_reg *src = &inst->src[i]; + + if (src->file == GRF && src->reladdr && + scratch_loc[src->reg] == -1) { + scratch_loc[src->reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + } + } + } + + /* Now, for anything that will be accessed through scratch, rewrite + * it to load/store. Note that this is a _safe list walk, because + * we may generate a new scratch_write instruction after the one + * we're processing. + */ + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* Set up the annotation tracking for new generated instructions. */ + base_ir = inst->ir; + current_annotation = inst->annotation; + + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { + src_reg temp = src_reg(this, glsl_type::vec4_type); + + emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); + + inst->dst.file = temp.file; + inst->dst.reg = temp.reg; + inst->dst.reg_offset = temp.reg_offset; + inst->dst.reladdr = NULL; + } + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) + continue; + + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + + emit_scratch_read(inst, temp, inst->src[i], + scratch_loc[inst->src[i].reg]); + + inst->src[i].file = temp.file; + inst->src[i].reg = temp.reg; + inst->src[i].reg_offset = temp.reg_offset; + inst->src[i].reladdr = NULL; + } + } +} + + vec4_visitor::vec4_visitor(struct brw_vs_compile *c, struct gl_shader_program *prog, struct brw_shader *shader) From 0f22f98ccd69bb5e8df3c78203bce9bc630965c1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 13:16:06 -0700 Subject: [PATCH 384/600] i965: Make some EU emit code for DP read/write messages non-static. We keep building these strange interfaces for DP read/write where there's a helper function with some partially-specific, partially-general controls, which is used in exactly one place in code generation. Making these public will let us set up those instructions in the one place they're to be generated. --- src/mesa/drivers/dri/i965/brw_eu.h | 27 +++++++++++++++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 44 ++++++++++++------------- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 38dd99b693d..af50305fc2b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -801,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); +struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + /* Helpers for regular instructions: */ @@ -855,6 +861,27 @@ ROUND(RNDE) /* Helpers for SEND instruction: */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg); + void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e7370f36064..b08906426e4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) } -static void brw_set_dest(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg dest) +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) /* 10. Check destination issues. */ } -static void brw_set_src0(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p, } } -static void brw_set_dp_write_message( struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - GLboolean header_present, - GLuint pixel_scoreboard_clear, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; @@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p, } } -static void +void brw_set_dp_read_message(struct brw_compile *p, struct brw_instruction *insn, GLuint binding_table_index, @@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p, } - -static struct brw_instruction *next_insn( struct brw_compile *p, - GLuint opcode ) +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; @@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p, return insn; } - static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, From 584ff407482fd3baf5ce081dbbf9653eb76c40f1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 13:36:11 -0700 Subject: [PATCH 385/600] i965/vs: Add support for scratch read/write codegen. --- src/mesa/drivers/dri/i965/brw_vec4.h | 9 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 144 +++++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 2f171b72049..b5f442e6d21 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -451,6 +451,15 @@ public: struct brw_reg dst, struct brw_reg src); void generate_urb_write(vec4_instruction *inst); + void generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index); + void generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index); }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 57eb467567e..21830f99fc2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -1,5 +1,4 @@ -/* - * Copyright © 2011 Intel Corporation +/* Copyright © 2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -278,6 +277,139 @@ vec4_visitor::generate_urb_write(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } +void +vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index) +{ + int second_vertex_offset; + + if (intel->gen >= 6) + second_vertex_offset = 1; + else + second_vertex_offset = 16; + + m1 = retype(m1, BRW_REGISTER_TYPE_D); + + /* Set up M1 (message payload). Only the block offsets in M1.0 and + * M1.4 are used, and the rest are ignored. + */ + struct brw_reg m1_0 = suboffset(vec1(m1), 0); + struct brw_reg m1_4 = suboffset(vec1(m1), 4); + struct brw_reg index_0 = suboffset(vec1(index), 0); + struct brw_reg index_4 = suboffset(vec1(index), 4); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, m1_0, index_0); + + brw_set_predicate_inverse(p, true); + if (index.file == BRW_IMMEDIATE_VALUE) { + index_4.dw1.ud++; + brw_MOV(p, m1_4, index_4); + } else { + brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); + } + + brw_pop_insn_state(p); +} + +void +vec4_visitor::generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) +{ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (intel->gen == 5 || intel->is_g4x) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_read_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 2, /* mlen */ + 1 /* rlen */); +} + +void +vec4_visitor::generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) +{ + /* If the instruction is predicated, we'll predicate the send, not + * the header setup. + */ + brw_set_predicate_control(p, false); + + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), + retype(src, BRW_REGISTER_TYPE_D)); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + + brw_set_predicate_control(p, inst->predicate); + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_write_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + 3, /* mlen */ + true, /* header present */ + false, /* pixel scoreboard */ + 0, /* rlen */ + false, /* eot */ + false /* commit */); +} + void vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, struct brw_reg dst, @@ -308,6 +440,14 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, generate_urb_write(inst); break; + case VS_OPCODE_SCRATCH_READ: + generate_scratch_read(inst, dst, src[0]); + break; + + case VS_OPCODE_SCRATCH_WRITE: + generate_scratch_write(inst, dst, src[0], src[1]); + break; + default: if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { fail("unsupported opcode in `%s' in VS\n", From 54fa706d6f06955221cb6b452b5b170bfaaceef4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 13:38:50 -0700 Subject: [PATCH 386/600] i965/vs: Enable variable array indexing in the VS. --- src/mesa/drivers/dri/i965/brw_shader.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2eeeec25cac..2dc32c95610 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -111,12 +111,14 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) brw_do_cubemap_normalize(shader->ir); lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); + + bool input = true; + bool output = stage == MESA_SHADER_FRAGMENT; + bool temp = stage == MESA_SHADER_FRAGMENT; + bool uniform = true; + lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ); + input, output, temp, uniform); do { progress = false; From e94bdbe04a4f0adb73ab92153987f0c9f48814f7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 17:09:12 -0700 Subject: [PATCH 387/600] i965: Add gen6 disassembly for DP render cache messages. --- src/mesa/drivers/dri/i965/brw_disasm.c | 49 ++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index af41c848308..927b0b4acc9 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -309,6 +309,35 @@ char *target_function[16] = { [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" }; +char *target_function_gen6[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler", + [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render", + [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + char *math_function[16] = { [BRW_MATH_FUNCTION_INV] = "inv", [BRW_MATH_FUNCTION_LOG] = "log", @@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) newline (file); pad (file, 16); space = 0; - err |= control (file, "target function", target_function, - target, &space); + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } switch (target) { case BRW_MESSAGE_TARGET_MATH: @@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_read.msg_type); } break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: if (gen >= 6) { - format (file, " (%d, %d, %d, %d, %d, %d)", + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d, %d, %d)", inst->bits3.gen6_dp.binding_table_index, inst->bits3.gen6_dp.msg_control, inst->bits3.gen6_dp.msg_type, @@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_write.send_commit_msg); } break; + case BRW_MESSAGE_TARGET_URB: if (gen >= 5) { format (file, " %d", inst->bits3.urb_gen5.offset); From 7b91eefe7cbe771397684b5970f7c04313baa2f0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 8 Aug 2011 15:56:11 -0700 Subject: [PATCH 388/600] i965/vs: Slightly improve the trivial reg allocator to skip unused regs. This fixes most of the regressions in the vs array test set from the varying array indexing work, since the giant array that was originally allocated in virtual GRF space never gets used and is only ever read/stored from scratch space. --- .../dri/i965/brw_vec4_reg_allocate.cpp | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index e7f6b28a536..1bfd84d76e8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -41,15 +41,37 @@ vec4_visitor::reg_allocate_trivial() { int last_grf = 0; int hw_reg_mapping[this->virtual_grf_count]; + bool virtual_grf_used[this->virtual_grf_count]; int i; int next; + /* Calculate which virtual GRFs are actually in use after whatever + * optimization passes have occurred. + */ + for (int i = 0; i < this->virtual_grf_count; i++) { + virtual_grf_used[i] = false; + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + if (inst->dst.file == GRF) + virtual_grf_used[inst->dst.reg] = true; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + virtual_grf_used[inst->src[i].reg] = true; + } + } + /* Note that compressed instructions require alignment to 2 registers. */ hw_reg_mapping[0] = this->first_non_payload_grf; next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; for (i = 1; i < this->virtual_grf_count; i++) { - hw_reg_mapping[i] = next; - next += this->virtual_grf_sizes[i]; + if (virtual_grf_used[i]) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } } prog_data->total_grf = next; From 6408b0295f5c8be6fea891a025d79752484721b6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 10:57:09 -0700 Subject: [PATCH 389/600] i965/vs: Fix implementation of ir_unop_any. We were inheriting whatever previous predicate existed. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 049af6c3992..fde1d67759a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -998,7 +998,9 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_unop_any: - emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); From 250770b74d33bb8625c780a74a89477af033d13a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 11:00:28 -0700 Subject: [PATCH 390/600] i965/vs: Respect the gen6 limitation that math opcodes can't be align16. Fixes vs-acos-vec3 and friends. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 9 +++++++ .../drivers/dri/i965/brw_vec4_visitor.cpp | 26 +++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 21830f99fc2..effc82a8004 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -250,6 +250,14 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); + + brw_set_access_mode(p, BRW_ALIGN_1); brw_math(p, dst, brw_math_function(inst->opcode), @@ -258,6 +266,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, src, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index fde1d67759a..f4756a9a1a8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -129,7 +129,18 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) src_reg temp_src = src_reg(this, glsl_type::vec4_type); emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); - emit(opcode, dst, temp_src); + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, temp_src); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, temp_src); + } } void @@ -184,7 +195,18 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, emit(BRW_OPCODE_MOV, dst, src1); src1 = expanded; - emit(opcode, dst, src0, src1); + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, src0, src1); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, src0, src1); + } } void From abf843a797876b5e3c5c91dbec25b6553d2cc281 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 12:30:41 -0700 Subject: [PATCH 391/600] i965/vs: Add support for ir_binop_pow. Fixes vs-pow-float-float. --- src/mesa/drivers/dri/i965/brw_vec4.h | 10 +++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 63 +++++++++++++++++-- .../drivers/dri/i965/brw_vec4_visitor.cpp | 4 +- 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index b5f442e6d21..082021513d2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -444,12 +444,22 @@ public: void generate_vs_instruction(vec4_instruction *inst, struct brw_reg dst, struct brw_reg *src); + void generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); void generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); + void generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_urb_write(vec4_instruction *inst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index effc82a8004..df9521cd04e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -245,6 +245,15 @@ vec4_visitor::generate_math1_gen4(vec4_instruction *inst, BRW_MATH_PRECISION_FULL); } +static void +check_gen6_math_src_arg(struct brw_reg src) +{ + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); +} + void vec4_visitor::generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, @@ -252,10 +261,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); - /* Source swizzles are ignored. */ - assert(!src.abs); - assert(!src.negate); - assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); + check_gen6_math_src_arg(src); brw_set_access_mode(p, BRW_ALIGN_1); brw_math(p, @@ -269,6 +275,49 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, brw_set_access_mode(p, BRW_ALIGN_16); } +void +vec4_visitor::generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + check_gen6_math_src_arg(src0); + check_gen6_math_src_arg(src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + + brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + void vec4_visitor::generate_urb_write(vec4_instruction *inst) { @@ -442,7 +491,11 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, break; case SHADER_OPCODE_POW: - assert(!"finishme"); + if (intel->gen >= 6) { + generate_math2_gen6(inst, dst, src[0], src[1]); + } else { + generate_math2_gen4(inst, dst, src[0], src[1]); + } break; case VS_OPCODE_URB_WRITE: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f4756a9a1a8..f9447d7c391 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -188,11 +188,11 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, */ expanded = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst, src0); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); src0 = expanded; expanded = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst, src1); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); src1 = expanded; if (dst.writemask != WRITEMASK_XYZW) { From 0b359e3ea015576d0e75bf5ec19aceef337311a3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 14:35:38 -0700 Subject: [PATCH 392/600] i965/vs: Add support for loops. This is copied from brw_fs.cpp, instead of doing the temporary IR generation that ir_to_mesa does. Fixes glsl-vs-loop and friends. --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 53 ++++++++----------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f9447d7c391..e11ec40cc7b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -700,58 +700,47 @@ vec4_visitor::visit(ir_variable *ir) void vec4_visitor::visit(ir_loop *ir) { - ir_dereference_variable *counter = NULL; - - fail("not yet\n"); + dst_reg counter; /* We don't want debugging output to print the whole body of the * loop as the annotation. */ this->base_ir = NULL; - if (ir->counter != NULL) - counter = new(ir) ir_dereference_variable(ir->counter); + if (ir->counter != NULL) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); - if (ir->from != NULL) { - assert(ir->counter != NULL); + if (ir->from != NULL) { + this->base_ir = ir->from; + ir->from->accept(this); - ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); - - a->accept(this); - delete a; + emit(BRW_OPCODE_MOV, counter, this->result); + } } emit(BRW_OPCODE_DO); if (ir->to) { - ir_expression *e = - new(ir) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(ir) ir_if(e); + this->base_ir = ir->to; + ir->to->accept(this); - ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), + src_reg(counter), this->result); + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); - if_stmt->then_instructions.push_tail(brk); - - if_stmt->accept(this); - - delete if_stmt; - delete e; - delete brk; + inst = emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; } visit_instructions(&ir->body_instructions); + if (ir->increment) { - ir_expression *e = - new(ir) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); - - ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); - - a->accept(this); - delete a; - delete e; + this->base_ir = ir->increment; + ir->increment->accept(this); + emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); } emit(BRW_OPCODE_WHILE); From fea7d34b3545878ce00914f388e1eeebf55f7748 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 14:49:29 -0700 Subject: [PATCH 393/600] i965/vs: Fix builtin uniform setup. I want to intelligently pack them at some point, but for now we have the params set up in groups of 4. Fixes glsl-vs-normalscale. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e11ec40cc7b..93252f73285 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -443,13 +443,12 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) int last_swiz = -1; for (unsigned int j = 0; j < 4; j++) { int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; last_swiz = swiz; c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; - this->uniform_size[this->uniforms]++; + if (swiz <= last_swiz) + this->uniform_size[this->uniforms]++; } this->uniforms++; } From a55fbbc1a2b579aed1e80036367b521ef6928f66 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 15:08:47 -0700 Subject: [PATCH 394/600] i965/vs: Fix access of attribute arrays. By leaving out the column index, we were reading an unallocated attribute on glsl-mat-attribute. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index df9521cd04e..517a3e3c75b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -52,8 +52,9 @@ vec4_visitor::setup_attributes(int payload_reg) if (inst->src[i].file != ATTR) continue; + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); + inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0); inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; } } From aed5e353e95f47773864c6e61c506b9ddad0e2e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 15:19:26 -0700 Subject: [PATCH 395/600] i965/vs: Clamp vertex color outputs when required by ARB_color_buffer_float. Fixes glsl-vs-vertex-color. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 93252f73285..2a1f003b5ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1764,7 +1764,16 @@ vec4_visitor::emit_urb_writes() if (attr == VERT_RESULT_PSIZ) continue; - emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), + src_reg(output_reg[attr])); + + if ((attr == VERT_RESULT_COL0 || + attr == VERT_RESULT_COL1 || + attr == VERT_RESULT_BFC0 || + attr == VERT_RESULT_BFC1) && + c->key.clamp_vertex_color) { + inst->saturate = true; + } /* If this was MRF 15, we can't fit anything more into this URB * WRITE. Note that base_mrf of 1 means that MRF 15 is an From 072d64121e13ad6bcb9b703090de1ee4a59f7096 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Aug 2011 11:38:42 -0700 Subject: [PATCH 396/600] i965/vs: Add support for GL_FIXED attributes. Fixes arb_es2_compatibility-fixed-type --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 517a3e3c75b..350d544aba3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -42,6 +42,18 @@ vec4_visitor::setup_attributes(int payload_reg) if (prog_data->inputs_read & BITFIELD64_BIT(i)) { attribute_map[i] = payload_reg + nr_attributes; nr_attributes++; + + /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED + * attributes come in as floating point conversions of the + * integer values. + */ + if (c->key.gl_fixed_input_size[i] != 0) { + struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0); + + brw_MUL(p, + brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), + reg, brw_imm_f(1.0 / 65536.0)); + } } } From 193a9a209d5121e2c20f1d20c61587b1e3d0603d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Aug 2011 14:13:23 -0700 Subject: [PATCH 397/600] i965/vs: Add support for if(any(bvec)) on gen6. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 2a1f003b5ce..d1888579597 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -564,10 +564,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir) assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar() || - expr->operation == ir_binop_any_nequal || - expr->operation == ir_binop_all_equal); - expr->operands[i]->accept(this); op[i] = this->result; } @@ -634,6 +630,14 @@ vec4_visitor::emit_if_gen6(ir_if *ir) inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; return; + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + default: assert(!"not reached"); inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); From e8980c61b2932cd4c8791fcc5afdb54fa033c224 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 09:17:18 -0700 Subject: [PATCH 398/600] i965/vs: Fix the trivial register allocator's failure path. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 +++ src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 350d544aba3..27160fb40d4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -561,6 +561,9 @@ vec4_visitor::run() setup_payload(); reg_allocate(); + if (failed) + return false; + brw_set_access_mode(p, BRW_ALIGN_16); generate_code(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 1bfd84d76e8..d5fd21d99a4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -39,7 +39,6 @@ assign(int *reg_hw_locations, reg *reg) void vec4_visitor::reg_allocate_trivial() { - int last_grf = 0; int hw_reg_mapping[this->virtual_grf_count]; bool virtual_grf_used[this->virtual_grf_count]; int i; @@ -84,9 +83,9 @@ vec4_visitor::reg_allocate_trivial() assign(hw_reg_mapping, &inst->src[2]); } - if (last_grf >= BRW_MAX_GRF) { + if (prog_data->total_grf > BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + prog_data->total_grf, BRW_MAX_GRF); } } From d376fa8e84b044ead47586d1b56a10742bcbdac7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 18:40:14 -0700 Subject: [PATCH 399/600] i965: Fix assertion failure on a loop consisting of while (true) { break }. On enabling the precompile step in the VS, we tripped over this assertion failure in glsl-link-bug-30552. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index b08906426e4..f5cc09dd49b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2311,7 +2311,7 @@ brw_find_loop_end(struct brw_compile *p, int start) if (insn->header.opcode == BRW_OPCODE_WHILE) { int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count : insn->bits3.break_cont.jip; - if (ip + jip / br < start) + if (ip + jip / br <= start) return ip; } } From 7fbe7fe13359d3f349664410ec73d7bd48824ed6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 09:52:08 -0700 Subject: [PATCH 400/600] i965/vs: Run the shader backend at link time and return compile failures. Link failure is something that shouldn't happen, but we sometimes want it during development. The precompile also allows analysis of shader codegen with shader-db. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++--- .../drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs.c | 51 ++++++++++++++++--- src/mesa/drivers/dri/i965/brw_vs.h | 3 +- 6 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 693ef0ce31a..b19c6e72fa6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1781,7 +1781,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, fs_visitor v(c, prog, shader); if (!v.run()) { prog->LinkStatus = GL_FALSE; - prog->InfoLog = ralloc_strdup(prog, v.fail_msg); + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2dc32c95610..3ff6bbaed47 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -24,6 +24,7 @@ extern "C" { #include "main/macros.h" #include "brw_context.h" +#include "brw_vs.h" } #include "brw_fs.h" #include "../glsl/ir_optimization.h" @@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) if (!brw_fs_precompile(ctx, prog)) return false; + if (!brw_vs_precompile(ctx, prog)) + return false; + return true; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 27160fb40d4..9ef6ab6de90 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -809,14 +809,8 @@ vec4_visitor::generate_code() extern "C" { bool -brw_vs_emit(struct brw_vs_compile *c) +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) { - struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &intel->ctx; - struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; - if (!prog) return false; @@ -833,8 +827,8 @@ brw_vs_emit(struct brw_vs_compile *c) vec4_visitor v(c, prog, shader); if (!v.run()) { - /* FINISHME: Cleanly fail, test at link time, etc. */ - assert(!"not reached"); + prog->LinkStatus = GL_FALSE; + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d1888579597..b1792a8ee16 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2012,7 +2012,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->current_annotation = NULL; this->c = c; - this->vp = brw->vertex_program; /* FINISHME: change for precompile */ + this->vp = prog->VertexProgram; this->prog_data = &c->prog_data; this->variable_ht = hash_table_ctor(0, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index d389f602fba..3373e707d98 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -40,9 +40,11 @@ #include "../glsl/ralloc.h" -static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) +static bool +do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; @@ -91,9 +93,11 @@ static void do_vs_prog( struct brw_context *brw, if (new_vs == -1) new_vs = getenv("INTEL_NEW_VS") != NULL; - if (new_vs) { - if (!brw_vs_emit(&c)) - brw_old_vs_emit(&c); + if (new_vs && prog) { + if (!brw_vs_emit(prog, &c)) { + ralloc_free(mem_ctx); + return false; + } } else { brw_old_vs_emit(&c); } @@ -130,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); + + return true; } @@ -174,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), &brw->vs.prog_offset, &brw->vs.prog_data)) { - do_vs_prog(brw, vp, &key); + bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram, + vp, &key); + + assert(success); } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } - /* See brw_vs.c: */ const struct brw_tracked_state brw_vs_prog = { @@ -193,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = { }, .prepare = brw_upload_vs_prog }; + +bool +brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_vs_prog_key key; + struct gl_vertex_program *vp = prog->VertexProgram; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + uint32_t old_prog_offset = brw->vs.prog_offset; + struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data; + bool success; + + if (!vp) + return true; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = bvp->id; + key.clamp_vertex_color = true; + + success = do_vs_prog(brw, prog, bvp, &key); + + brw->vs.prog_offset = old_prog_offset; + brw->vs.prog_data = old_prog_data; + + return success; +} diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 83a37f5b800..beccb381ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -93,7 +93,8 @@ struct brw_vs_compile { GLboolean needs_stack; }; -bool brw_vs_emit(struct brw_vs_compile *c); +bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c); void brw_old_vs_emit(struct brw_vs_compile *c); +bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); #endif From 7642c1de6b65b7dfd9e39904291cc9737cd54b56 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 08:09:10 -0700 Subject: [PATCH 401/600] i965/vs: Avoid generating a MOV for most ir_assignment handling. Removes an average of 11.5% of instructions in 54% of vertex shaders in shader-db. --- src/mesa/drivers/dri/i965/brw_vec4.h | 6 ++ .../drivers/dri/i965/brw_vec4_visitor.cpp | 67 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 082021513d2..620b05570a6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -388,6 +388,12 @@ public: vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1, src_reg src2); + bool try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst); + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ void visit_instructions(const exec_list *list); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b1792a8ee16..ae733810757 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1342,6 +1342,63 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, src->reg_offset++; } + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + void vec4_visitor::visit(ir_assignment *ir) { @@ -1363,7 +1420,13 @@ vec4_visitor::visit(ir_assignment *ir) /* Now we're down to just a scalar/vector with writemasks. */ int i; + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + src_reg src = this->result; int swizzles[4]; @@ -1396,6 +1459,10 @@ vec4_visitor::visit(ir_assignment *ir) src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], swizzles[2], swizzles[3]); + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + if (ir->condition) { emit_bool_to_cond_code(ir->condition); } From 54e66a0a6327b55f15a7c641ec68da505ff19a35 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 16:27:41 -0700 Subject: [PATCH 402/600] i965/vs: Fix abs/negate handling on attributes. Fixes glsl-vs-neg-attribute and glsl-vs-abs-attribute. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 9ef6ab6de90..6b0ae42e0e0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -65,9 +65,16 @@ vec4_visitor::setup_attributes(int payload_reg) continue; int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0); - inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; + inst->src[i].fixed_hw_reg = reg; } } From 905f3d03090c7b86e410959c5640054f5f6894ef Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Aug 2011 05:15:50 -0700 Subject: [PATCH 403/600] i965/vs: Remove remaining use of foreach_iter. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 10 +++------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 6b0ae42e0e0..fca31b6dec9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -57,8 +57,8 @@ vec4_visitor::setup_attributes(int payload_reg) } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - vec4_instruction *inst = (vec4_instruction *)iter.get(); + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != ATTR) @@ -546,11 +546,7 @@ vec4_visitor::run() /* Generate VS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); - base_ir = ir; - ir->accept(this); - } + visit_instructions(shader->ir); emit_urb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ae733810757..fc75cc35172 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -234,8 +234,8 @@ vec4_visitor::emit_math(enum opcode opcode, void vec4_visitor::visit_instructions(const exec_list *list) { - foreach_iter(exec_list_iterator, iter, *list) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, list) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; ir->accept(this); From d0c595ac8032aa9aed402a513870b8dc92e42903 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Aug 2011 05:28:53 -0700 Subject: [PATCH 404/600] i965/gen6: Force WHILE exec size to 8. We can't just look at the instruction that happens to appear at the start of the loop, because it might be some other exec size and cause us to only loop on the first N channels. We always want 8 in our current code (since 16 doesn't work so we don't do 16-wide fragment in that case). Fixes loop-03.vert, which was triggering the assertions. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index f5cc09dd49b..27e81306e9c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src1(p, insn, brw_imm_ud(0)); insn->bits3.break_cont.jip = br * (do_insn - insn); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else if (intel->gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); @@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else { if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); From 8a649277cb57cc13fb38f8e8daf07e8a2b96223c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Aug 2011 05:32:25 -0700 Subject: [PATCH 405/600] i965/vs: Don't assertion fail on vertex texturing. The linker will reject the program, but we need to survive until then. Fixes abort in glsl1-2D Texture lookup with explicit lod (Vertex shader) --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index fc75cc35172..d03fbff27fc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1554,7 +1554,12 @@ vec4_visitor::visit(ir_call *ir) void vec4_visitor::visit(ir_texture *ir) { - assert(!"not reached"); + /* FINISHME: Implement vertex texturing. + * + * With 0 vertex samplers available, the linker will reject + * programs that do vertex texturing, but after our visitor has + * run. + */ } void From feff7c62ce446f4e3bb755a2f40dcbd0e70155e4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 20:13:53 -0700 Subject: [PATCH 406/600] i965/vs: Fix condition code for scalar expression all_equals. Fixes vs-op-eq-bool-bool. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d03fbff27fc..3ae89dfbc45 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -985,7 +985,7 @@ vec4_visitor::visit(ir_expression *ir) temp.type = op[0].type; inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst->conditional_mod = BRW_CONDITIONAL_Z; emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); } break; From e9a86ae3370948acb1276e80fbbc421d7025db36 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 20:43:42 -0700 Subject: [PATCH 407/600] i965/vs: Fix memory leak of ralloc context for the visitor. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3ae89dfbc45..185a01e05f9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2104,6 +2104,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, vec4_visitor::~vec4_visitor() { + ralloc_free(this->mem_ctx); hash_table_dtor(this->variable_ht); } From 7bf70c29adf175f51d0347d0187aecc0e9bbbcb8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 20:59:24 -0700 Subject: [PATCH 408/600] i965/vs: Add support for conversion of FIXED_HW_REG src_reg to/from dst_reg. This was quietly occurring in some emit code I produced, and failed. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 185a01e05f9..621cb53ff84 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -38,6 +38,7 @@ src_reg::src_reg(dst_reg reg) this->reg_offset = reg.reg_offset; this->type = reg.type; this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; int swizzles[4]; int next_chan = 0; @@ -68,6 +69,7 @@ dst_reg::dst_reg(src_reg reg) this->type = reg.type; this->writemask = WRITEMASK_XYZW; this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; } vec4_instruction * From 0ddf0f1c3451eef8a7c7f46afca623dc4f7c5af6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 21:02:10 -0700 Subject: [PATCH 409/600] i965/vs: Fix multiplies to actually do 32-bit multiplies. Fixes vs-op-mult-int-int and friends. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 5 +++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index fca31b6dec9..011af6f2d3e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -633,6 +633,11 @@ vec4_visitor::generate_code() case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 621cb53ff84..a60fc5f6ada 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -945,7 +945,23 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_binop_mul: - emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]); + emit(BRW_OPCODE_MOV, result_dst, src_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + } break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); From eb0ff1a1c0f1978d867c748bf2525f717a56bfce Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 15 Aug 2011 10:58:25 -0700 Subject: [PATCH 410/600] mesa: Remove use of fpu_control.h Remove the inclusion of fpu_control.h from compiler.h. Since Bionic lacks fpu_control.h, this fixes the Android build. Also remove the sole use of the fpu_control bits, which was in debug.c. Those were brianp's debug bits, and he approved of their removal. Reviewed-by: Eric Anholt Signed-off-by: Chad Versace --- src/mesa/main/compiler.h | 3 --- src/mesa/main/debug.c | 11 ----------- 2 files changed, 14 deletions(-) diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index ee7d0b2f880..8ed1c6fa61f 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -45,9 +45,6 @@ #include #include #include -#if defined(__linux__) && defined(__i386__) -#include -#endif #include #include diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index e7f6be99481..b1fc096f296 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug ) if (strstr(debug, "flush")) MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH; -#if defined(_FPU_GETCW) && defined(_FPU_SETCW) - if (strstr(debug, "fpexceptions")) { - /* raise FP exceptions */ - fpu_control_t mask; - _FPU_GETCW(mask); - mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM - | _FPU_MASK_OM | _FPU_MASK_UM); - _FPU_SETCW(mask); - } -#endif - #else (void) debug; #endif From bd064a49f119d126623c0e85702801e4cee62187 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 15 Aug 2011 13:26:21 -0700 Subject: [PATCH 411/600] mesa: Fix Android build by #ifdef'ing out locale support Bionic does not support locales. This commit #ifdef's out the locale usage in _mesa_strtof(). Signed-off-by: Chad Versace --- src/mesa/main/imports.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 0a572ec225d..8f097195922 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -753,7 +753,8 @@ _mesa_strdup( const char *s ) float _mesa_strtof( const char *s, char **end ) { -#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ + !defined(ANDROID) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); From 3c9f172fe801a8e954a40affc38942b628b81bda Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 15 Aug 2011 13:29:15 -0700 Subject: [PATCH 412/600] mesa: Add Android to list of platforms that define fpclassify() This is a fix for the Android build. Signed-off-by: Chad Versace --- src/mesa/main/querymatrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index 944ad435f7a..eaedf7cd238 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -73,7 +73,7 @@ fpclassify(double x) #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \ defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \ - (defined(__sun) && defined(__GNUC__)) + (defined(__sun) && defined(__GNUC__)) || defined(ANDROID) /* fpclassify is available. */ From 6ad08989d7c10892919ce1cb9c88c4cf8b73e1dc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 30 Jul 2011 10:48:10 -0700 Subject: [PATCH 413/600] ir_to_mesa: Implement ir_unop_logic_not using 1-x Since our logic values are 0.0 (false) and 1.0 (true), 1.0 - x accurately implements logical not. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1ef609fe15d..f03ea7a95e0 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1135,7 +1135,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0)); + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; From 41f8ffe5e07c4f389eb13d17ecf0ff776890e9bc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 30 Jul 2011 10:49:49 -0700 Subject: [PATCH 414/600] ir_to_mesa: Implement ir_binop_logic_or using an add w/saturate or add w/SLT Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives a + b which has a result on the range [0, 2]. Previously a SNE instruction was used to clamp the resulting logic value to [0,1]. In a fragment shader, using a saturate on the add has the same effect. Adding the saturate to the add is free, so (at least) one instruction is saved. In a vertex shader, using an SLT on the negation of the add result has the same effect. Many older shader architectures do not support the SNE instruction. It must be emulated using two SLT instructions and an ADD. On these architectures, the single SLT saves two instructions. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index f03ea7a95e0..fcd14c89cd7 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1267,11 +1267,28 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *add = + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + add->saturate = true; + } else { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ From 7f4c65256cc3f4d9f6a214424beabe688a5dd6a2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 30 Jul 2011 10:45:35 -0700 Subject: [PATCH 415/600] ir_to_mesa: Make ir_to_mesa_visitor::emit_dp return the instruction Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index fcd14c89cd7..60d498bd9e3 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -297,11 +297,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - dst_reg dst, - src_reg src0, - src_reg src1, - unsigned elements); + ir_to_mesa_instruction * emit_dp(ir_instruction *ir, + dst_reg dst, + src_reg src0, + src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0); @@ -408,7 +408,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } -void +ir_to_mesa_instruction * ir_to_mesa_visitor::emit_dp(ir_instruction *ir, dst_reg dst, src_reg src0, src_reg src1, unsigned elements) @@ -417,7 +417,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir, OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** From 92ca560d68e8a6b532998707afcf4f60c0ce2806 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 15:27:43 -0700 Subject: [PATCH 416/600] ir_to_mesa: Implement ir_unop_any using DP4 w/saturate or DP4 w/SLT This is just like the ir_binop_logic_or case. The operation ir_unop_any is (a.x || a.y || a.z || a.w). Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives (a.x + a.y + a.z + a.w). This can be implemented using a dot-product with a vector of all 1.0. Previously a SNE instruction was used to clamp the resulting logic value to [0,1]. In a fragment shader, using a saturate on the dot-product has the same effect. Adding the saturate to the dot-product is free, so (at least) one instruction is saved. In a vertex shader, using an SLT on the negation of the dot-product result has the same effect. Many older shader architectures do not support the SNE instruction. It must be emulated using two SLT instructions and an ADD. On these architectures, the single SLT saves two instructions. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 60d498bd9e3..1bd9a2eee1b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1256,12 +1256,31 @@ ir_to_mesa_visitor::visit(ir_expression *ir) } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_xor: emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); From e7bf096e8b04931996c8c56548ce0b2c0af3a0dc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 15:35:01 -0700 Subject: [PATCH 417/600] ir_to_mesa: Implement ir_binop_any_nequal using DP4 w/saturate or DP4 w/SLT The operation ir_binop_any_nequal is (a.x != b.x) || (a.y != b.y) || (a.z != b.z) || (a.w != b.w), and that is the same as any(bvec4(a.x != b.x, a.y != b.y, a.z != b.z, a.w != b.w)). Implement the any() part the same way the regular ir_unop_any is implemented. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1bd9a2eee1b..1c674ea8756 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1249,8 +1249,26 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } } else { emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); } From ba01df11c4d09c65514a8522cb319e29034ab5a8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 15:42:05 -0700 Subject: [PATCH 418/600] ir_to_mesa: Implement ir_binop_all_equal using DP4 w/SGE The operation ir_binop_all_equal is !(a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w). Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives !bool((int(a.x != b.x) + int(a.y == b.y) + int(a.z == b.z) + int(a.w == b.w)). This can be implemented using a dot-product with a vector of all 1.0. After the dot-product, the value will be an integer on the range [0,4]. Previously a SEQ instruction was used to clamp the resulting logic value to [0,1] and invert the result. Using an SGE instruction on the negation of the dot-product result has the same effect. Many older shader architectures do not support the SEQ instruction. It must be emulated using two SGE instructions and a MUL. On these architectures, the single SGE saves two instructions. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1c674ea8756..4c8b097de6b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1237,8 +1237,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0)); + + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. This + * achieved using SGE. + */ + src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); } else { emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); } From ff2cfb8989cd79218dfe2cd8c3de20f1ca7418e6 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 2 Aug 2011 12:17:20 -0700 Subject: [PATCH 419/600] ir_to_mesa: Emit a MAD(b, -a, b) for !a && b !a && b occurs frequently when nexted if-statements have been flattened. It should also be possible use a MAD for (a && b) || c, though that would require a MAD_SAT. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 4c8b097de6b..b1211c1145c 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -314,6 +314,8 @@ public: GLboolean try_emit_mad(ir_expression *ir, int mul_operand); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); GLboolean try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -892,6 +894,46 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) return true; } +/** + * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); + + return true; +} + GLboolean ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) { @@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir) if (try_emit_mad(ir, 0)) return; } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + if (try_emit_sat(ir)) return; From 54c48a95e6e0573886433f94ac83293876ffe03d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 10 Feb 2011 15:48:27 -0800 Subject: [PATCH 420/600] mesa: Add partial constant propagation pass for Mesa IR This cleans up some code generated by the IR-to-Mesa pass for i915. In particular, some shaders involving arrays of constant matrices result in really bad code. v2: Silence several warnings from merging the gl_constant_value work. Fix DP[23] folding. Add support for a bunch more opcodes that appear in piglit runs on i915. Reviewed-by: Eric Anholt --- src/mesa/SConscript | 1 + src/mesa/program/prog_opt_constant_fold.c | 451 ++++++++++++++++++++++ src/mesa/program/prog_optimize.c | 2 + src/mesa/program/prog_optimize.h | 3 + src/mesa/sources.mak | 1 + 5 files changed, 458 insertions(+) create mode 100644 src/mesa/program/prog_opt_constant_fold.c diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 05aa0e8010e..b0c3334fa48 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -293,6 +293,7 @@ program_sources = [ 'program/prog_instruction.c', 'program/prog_noise.c', 'program/prog_optimize.c', + 'program/prog_opt_constant_fold.c', 'program/prog_parameter.c', 'program/prog_parameter_layout.c', 'program/prog_print.c', diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c new file mode 100644 index 00000000000..e2418b55451 --- /dev/null +++ b/src/mesa/program/prog_opt_constant_fold.c @@ -0,0 +1,451 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "program.h" +#include "prog_instruction.h" +#include "prog_optimize.h" +#include "prog_parameter.h" +#include + +static bool +src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) +{ + unsigned i; + + for (i = 0; i < num_srcs; i++) { + if (inst->SrcReg[i].File != PROGRAM_CONSTANT) + return false; + } + + return true; +} + +static struct prog_src_register +src_reg_for_float(struct gl_program *prog, float val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) &val, 1, &swiz); + src.Swizzle = swiz; + return src; +} + +static struct prog_src_register +src_reg_for_vec4(struct gl_program *prog, const float *val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) val, 4, &swiz); + src.Swizzle = swiz; + return src; +} + +static bool +src_regs_are_same(const struct prog_src_register *a, + const struct prog_src_register *b) +{ + return (a->File == b->File) + && (a->Index == b->Index) + && (a->Swizzle == b->Swizzle) + && (a->Abs == b->Abs) + && (a->Negate == b->Negate) + && (a->RelAddr == 0) + && (b->RelAddr == 0); +} + +static void +get_value(struct gl_program *prog, struct prog_src_register *r, float *data) +{ + const gl_constant_value *const value = + prog->Parameters->ParameterValues[r->Index]; + + data[0] = value[GET_SWZ(r->Swizzle, 0)].f; + data[1] = value[GET_SWZ(r->Swizzle, 1)].f; + data[2] = value[GET_SWZ(r->Swizzle, 2)].f; + data[3] = value[GET_SWZ(r->Swizzle, 3)].f; + + if (r->Abs) { + data[0] = fabsf(data[0]); + data[1] = fabsf(data[1]); + data[2] = fabsf(data[2]); + data[3] = fabsf(data[3]); + } + + if (r->Negate & 0x01) { + data[0] = -data[0]; + } + + if (r->Negate & 0x02) { + data[1] = -data[1]; + } + + if (r->Negate & 0x04) { + data[2] = -data[2]; + } + + if (r->Negate & 0x08) { + data[3] = -data[3]; + } +} + +/** + * Try to replace instructions that produce a constant result with simple moves + * + * The hope is that a following copy propagation pass will eliminate the + * unnecessary move instructions. + */ +GLboolean +_mesa_constant_fold(struct gl_program *prog) +{ + bool progress = false; + unsigned i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *const inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ADD: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] + b[0]; + result[1] = a[1] + b[1]; + result[2] = a[2] + b[2]; + result[3] = a[3] + b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_CMP: + /* FINISHME: We could also optimize CMP instructions where the first + * FINISHME: source is a constant that is either all < 0.0 or all + * FINISHME: >= 0.0. + */ + if (src_regs_are_constant(inst, 3)) { + float a[4]; + float b[4]; + float c[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + get_value(prog, &inst->SrcReg[2], c); + + result[0] = a[0] < 0.0f ? b[0] : c[0]; + result[1] = a[1] < 0.0f ? b[1] : c[1]; + result[2] = a[2] < 0.0f ? b[2] : c[2]; + result[3] = a[3] < 0.0f ? b[3] : c[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + /* It seems like a loop could be used here, but we cleverly put + * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from + * the opcode results in various failures of the loop control. + */ + result = (a[0] * b[0]) + (a[1] * b[1]); + + if (inst->Opcode >= OPCODE_DP3) + result += a[2] * b[2]; + + if (inst->Opcode == OPCODE_DP4) + result += a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_MUL: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] * b[0]; + result[1] = a[1] * b[1]; + result[2] = a[2] * b[2]; + result[3] = a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SEQ: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SNE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 3340ce0498b..25d9684b137 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) any_change = GL_TRUE; if (_mesa_remove_dead_code_local(program)) any_change = GL_TRUE; + + any_change = _mesa_constant_fold(program) || any_change; _mesa_reallocate_registers(program); } while (any_change); } diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h index 463f5fc51c4..9854fb7a491 100644 --- a/src/mesa/program/prog_optimize.h +++ b/src/mesa/program/prog_optimize.h @@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions, extern void _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program); +extern GLboolean +_mesa_constant_fold(struct gl_program *prog); + #endif diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index ed008f8813e..5e77e0f5919 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -251,6 +251,7 @@ PROGRAM_SOURCES = \ program/prog_instruction.c \ program/prog_noise.c \ program/prog_optimize.c \ + program/prog_opt_constant_fold.c \ program/prog_parameter.c \ program/prog_parameter_layout.c \ program/prog_print.c \ From 7125f1e87df359be4aad1d801b633146eeac7292 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 17:12:29 -0700 Subject: [PATCH 421/600] mesa: Bump instruction execution limit to 65536 Shader Model 3.0[1] requires that shaders be able to execute at least 65536 instructions. Bump Mesa maxExec to that limit. This allows several vertex shaders in the OpenGL ES 2.0 conformance test suite to run to completion. 1: http://en.wikipedia.org/wiki/High_Level_Shader_Language Reviewed-by: Eric Anholt --- src/mesa/program/prog_execute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index dbfd1b91875..c70a1e344e5 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx, struct gl_program_machine *machine) { const GLuint numInst = program->NumInstructions; - const GLuint maxExec = 10000; + const GLuint maxExec = 65536; GLuint pc, numExec = 0; machine->CurProgram = program; From b629d5ba24f76ed6af35455a874d351fde1e5bbe Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 1 Jul 2011 13:49:18 +0300 Subject: [PATCH 422/600] xmlconfig: Make the error message more informative --- src/mesa/drivers/dri/common/xmlconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 77967ac2a43..12dd31bb162 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) { } else defaultVal = attrVal[OA_DEFAULT]; if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal)) - XML_FATAL ("illegal default value: %s.", defaultVal); + XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal); if (attrVal[OA_VALID]) { if (cache->info[opt].type == DRI_BOOL) From 59e56957cce16e5d993974e4b7f339afc9cb949b Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 1 Jul 2011 13:01:00 +0300 Subject: [PATCH 423/600] xmlpool.h: fix a typo --- src/mesa/drivers/dri/common/xmlpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h index 587517ea10a..ffea430024d 100644 --- a/src/mesa/drivers/dri/common/xmlpool.h +++ b/src/mesa/drivers/dri/common/xmlpool.h @@ -60,7 +60,7 @@ #define DRI_CONF_OPT_BEGIN(name,type,def) \ "
  • Mesa Extensions
  • Function Name Mangling
  • Gallium llvmpipe driver +
  • Gallium post-processing Developer Topics diff --git a/docs/postprocess.html b/docs/postprocess.html new file mode 100644 index 00000000000..2a379694298 --- /dev/null +++ b/docs/postprocess.html @@ -0,0 +1,56 @@ + + +Gallium Post-processing + + + + + +

    Gallium Post-processing

    + +

    +The Gallium drivers support user-defined image post-processing. +At the end of drawing a frame a post-processing filter can be applied to +the rendered image. +Example filters include morphological antialiasing and cell shading. +

    + +

    +The filters can be toggled per-app via driconf, or per-session via the +corresponding environment variables. +

    + +

    +Multiple filters can be used together. +

    + + +

    PP environment variables

    + +
      +
    • PP_DEBUG - If defined debug information will be printed to stderr. +
    + +

    Current filters

    + +
      +
    • pp_nored, pp_nogreen, pp_noblue - set to 1 to remove the corresponding color channel. +These are basic filters for easy testing of the PP queue. +
    • pp_jimenezmlaa, pp_jimenezmlaa_color - +Jimenez's MLAA +is a morphological antialiasing filter. +The two versions use depth and color data, respectively. +Which works better depends on the app - depth will not blur text, but it will +miss transparent textures for example. +Set to a number from 2 to 32, roughly corresponding to quality. +Numbers higher than 8 see minimizing gains. +
    • pp_celshade - set to 1 to enable cell shading (a more complex color filter). +
    + + +
    +
    + + + + From e3b0e3776646d0367206e4544229622eb22fe9f8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 26 Aug 2011 14:16:20 -0600 Subject: [PATCH 600/600] g3dvl: use pointer_to_uintptr() to silence a cast warning --- src/gallium/auxiliary/vl/vl_vlc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h index 17a7b650c09..4db1334d6a4 100644 --- a/src/gallium/auxiliary/vl/vl_vlc.h +++ b/src/gallium/auxiliary/vl/vl_vlc.h @@ -33,6 +33,7 @@ #include #include +#include "util/u_pointer.h" struct vl_vlc { @@ -98,7 +99,7 @@ vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len) vlc->valid_bits = 0; /* align the data pointer */ - while((uint64_t)data & 3) { + while (pointer_to_uintptr(data) & 3) { vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits); ++data; --len;