From f67de2ed468ecbd09085746130a27bd2c2da911d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Mon, 4 Jul 2011 12:31:30 +0100 Subject: [PATCH 001/113] draw: Fix fetch_max calculation. It should be max_index = start + count - 1 instead of max_index = count - 1 --- src/gallium/auxiliary/draw/draw_llvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index f33c9078c9c..8bb87440497 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1235,7 +1235,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) draw_llvm_variant_key_samplers(&variant->key), context_ptr); - fetch_max = LLVMBuildSub(builder, count, + /* fetch_max = start + count - 1 */ + fetch_max = LLVMBuildSub(builder, end, lp_build_const_int32(gallivm, 1), "fetch_max"); From 865f9272189697932d96a318f60efb99344ab944 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 5 Jul 2011 01:58:46 +0200 Subject: [PATCH 002/113] r600g: Get rid of the unused "family" parameter to r600_is_vertex_format_supported(). Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_pipe.c | 10 +++------- src/gallium/drivers/r600/r600_state_inlines.h | 3 +-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 8f3a175587e..ac41449a301 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -547,13 +547,9 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_DEPTH_STENCIL; } - if (usage & PIPE_BIND_VERTEX_BUFFER) { - struct r600_screen *rscreen = (struct r600_screen *)screen; - enum radeon_family family = r600_get_family(rscreen->radeon); - - if (r600_is_vertex_format_supported(format, family)) { - retval |= PIPE_BIND_VERTEX_BUFFER; - } + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) { + retval |= PIPE_BIND_VERTEX_BUFFER; } if (usage & PIPE_BIND_TRANSFER_READ) diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index eb6fb6594b2..71852543e56 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -576,8 +576,7 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format, - enum radeon_family family) +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) { unsigned i; const struct util_format_description *desc = util_format_description(format); From 18cdb9c8abfb5d9f009fcb36ab788f48792207e4 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 5 Jul 2011 01:58:46 +0200 Subject: [PATCH 003/113] r600g: Use the actual Evergreen functions to query format support on Evergreen. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/evergreen_state.c | 55 +++++++++++++++++ src/gallium/drivers/r600/r600_formats.h | 32 ++++++++++ src/gallium/drivers/r600/r600_pipe.c | 61 ++----------------- src/gallium/drivers/r600/r600_pipe.h | 10 +++ src/gallium/drivers/r600/r600_state.c | 55 +++++++++++++++++ src/gallium/drivers/r600/r600_state_inlines.h | 32 ---------- 6 files changed, 157 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index dc182611482..18d54cc1e26 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -48,6 +48,61 @@ #include "r600_pipe.h" #include "eg_state_inlines.h" +boolean evergreen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + R600_ERR("r600: unsupported texture type %d\n", target); + return FALSE; + } + + if (!util_format_is_supported(format, usage)) + return FALSE; + + /* Multisample */ + if (sample_count > 1) + return FALSE; + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + r600_is_sampler_format_supported(screen, format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { + retval |= usage & + (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + r600_is_zs_format_supported(format)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; +} + static void evergreen_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h index ae0bc432ad2..1c1089d89d2 100644 --- a/src/gallium/drivers/r600/r600_formats.h +++ b/src/gallium/drivers/r600/r600_formats.h @@ -81,4 +81,36 @@ static INLINE unsigned r600_endian_swap(unsigned size) } } +static INLINE bool r600_is_vertex_format_supported(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + if (!desc) + return false; + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + } + if (i == 4) + return false; + + /* No fixed, no double. */ + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || + (desc->channel[i].size == 64 && + desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) + return false; + + /* No scaled/norm formats with 32 bits per channel. */ + if (desc->channel[i].size == 32 && + (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) + return false; + + return true; +} + #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index ac41449a301..d512268f63f 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -45,7 +45,6 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "r600_state_inlines.h" /* * pipe_context @@ -506,60 +505,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } } -static boolean r600_is_format_supported(struct pipe_screen* screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage) -{ - unsigned retval = 0; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - R600_ERR("r600: unsupported texture type %d\n", target); - return FALSE; - } - - if (!util_format_is_supported(format, usage)) - return FALSE; - - /* Multisample */ - if (sample_count > 1) - return FALSE; - - if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(screen, format)) { - retval |= PIPE_BIND_SAMPLER_VIEW; - } - - if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { - retval |= usage & - (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED); - } - - if ((usage & PIPE_BIND_DEPTH_STENCIL) && - r600_is_zs_format_supported(format)) { - retval |= PIPE_BIND_DEPTH_STENCIL; - } - - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) { - retval |= PIPE_BIND_VERTEX_BUFFER; - } - - if (usage & PIPE_BIND_TRANSFER_READ) - retval |= PIPE_BIND_TRANSFER_READ; - if (usage & PIPE_BIND_TRANSFER_WRITE) - retval |= PIPE_BIND_TRANSFER_WRITE; - - return retval == usage; -} - static void r600_destroy_screen(struct pipe_screen* pscreen) { struct r600_screen *rscreen = (struct r600_screen *)pscreen; @@ -648,7 +593,11 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->screen.get_param = r600_get_param; rscreen->screen.get_shader_param = r600_get_shader_param; rscreen->screen.get_paramf = r600_get_paramf; - rscreen->screen.is_format_supported = r600_is_format_supported; + if (r600_get_family_class(radeon) >= EVERGREEN) { + rscreen->screen.is_format_supported = evergreen_is_format_supported; + } else { + rscreen->screen.is_format_supported = r600_is_format_supported; + } rscreen->screen.context_create = r600_create_context; rscreen->screen.fence_reference = r600_fence_reference; rscreen->screen.fence_signalled = r600_fence_signalled; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 2667c80bcef..c58c2f77743 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -247,6 +247,11 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); +boolean evergreen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); @@ -290,6 +295,11 @@ void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); void r600_adjust_gprs(struct r600_pipe_context *rctx); +boolean r600_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index be07f5feff8..e329317b3ab 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -46,6 +46,61 @@ #include "r600_pipe.h" #include "r600_state_inlines.h" +boolean r600_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + R600_ERR("r600: unsupported texture type %d\n", target); + return FALSE; + } + + if (!util_format_is_supported(format, usage)) + return FALSE; + + /* Multisample */ + if (sample_count > 1) + return FALSE; + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + r600_is_sampler_format_supported(screen, format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { + retval |= usage & + (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + r600_is_zs_format_supported(format)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; +} + void r600_polygon_offset_update(struct r600_pipe_context *rctx) { struct r600_pipe_state state; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 71852543e56..5615dc6dda3 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -576,36 +576,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -{ - unsigned i; - const struct util_format_description *desc = util_format_description(format); - if (!desc) - return FALSE; - - /* Find the first non-VOID channel. */ - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } - if (i == 4) - return FALSE; - - /* No fixed, no double. */ - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || - desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || - (desc->channel[i].size == 64 && - desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) - return FALSE; - - /* No scaled/norm formats with 32 bits per channel. */ - if (desc->channel[i].size == 32 && - (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || - desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) - return FALSE; - - return TRUE; -} - #endif From 4015b5877c9b92433235eb65da3a9b0aa37e5b98 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 5 Jul 2011 01:58:46 +0200 Subject: [PATCH 004/113] r600g: Get rid of some unused functions. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/eg_state_inlines.h | 32 ------------------- src/gallium/drivers/r600/r600_state_inlines.h | 32 ------------------- 2 files changed, 64 deletions(-) diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index b5590116e8f..d9f88de7ce9 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -215,38 +215,6 @@ static inline unsigned r600_tex_compare(unsigned compare) } } -static inline unsigned r600_tex_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case PIPE_SWIZZLE_RED: - return V_030010_SQ_SEL_X; - case PIPE_SWIZZLE_GREEN: - return V_030010_SQ_SEL_Y; - case PIPE_SWIZZLE_BLUE: - return V_030010_SQ_SEL_Z; - case PIPE_SWIZZLE_ALPHA: - return V_030010_SQ_SEL_W; - case PIPE_SWIZZLE_ZERO: - return V_030010_SQ_SEL_0; - default: - case PIPE_SWIZZLE_ONE: - return V_030010_SQ_SEL_1; - } -} - -static inline unsigned r600_format_type(unsigned format_type) -{ - switch (format_type) { - default: - case UTIL_FORMAT_TYPE_UNSIGNED: - return V_030010_SQ_FORMAT_COMP_UNSIGNED; - case UTIL_FORMAT_TYPE_SIGNED: - return V_030010_SQ_FORMAT_COMP_SIGNED; - case UTIL_FORMAT_TYPE_FIXED: - return V_030010_SQ_FORMAT_COMP_UNSIGNED_BIASED; - } -} - static inline unsigned r600_tex_dim(unsigned dim) { switch (dim) { diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 5615dc6dda3..e9479bd846c 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -215,38 +215,6 @@ static inline unsigned r600_tex_compare(unsigned compare) } } -static inline unsigned r600_tex_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case PIPE_SWIZZLE_RED: - return V_038010_SQ_SEL_X; - case PIPE_SWIZZLE_GREEN: - return V_038010_SQ_SEL_Y; - case PIPE_SWIZZLE_BLUE: - return V_038010_SQ_SEL_Z; - case PIPE_SWIZZLE_ALPHA: - return V_038010_SQ_SEL_W; - case PIPE_SWIZZLE_ZERO: - return V_038010_SQ_SEL_0; - default: - case PIPE_SWIZZLE_ONE: - return V_038010_SQ_SEL_1; - } -} - -static inline unsigned r600_format_type(unsigned format_type) -{ - switch (format_type) { - default: - case UTIL_FORMAT_TYPE_UNSIGNED: - return V_038010_SQ_FORMAT_COMP_UNSIGNED; - case UTIL_FORMAT_TYPE_SIGNED: - return V_038010_SQ_FORMAT_COMP_SIGNED; - case UTIL_FORMAT_TYPE_FIXED: - return V_038010_SQ_FORMAT_COMP_UNSIGNED_BIASED; - } -} - static inline unsigned r600_tex_dim(unsigned dim) { switch (dim) { From 88cf65a600c34ff361bcb4a5d928be94dbc1d91e Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 5 Jul 2011 01:58:46 +0200 Subject: [PATCH 005/113] r600g: Get rid of an unused include in r600_texture.c. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_texture.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 8e75d847dc5..18460419f85 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -35,7 +35,6 @@ #include "pipebuffer/pb_buffer.h" #include "r600_pipe.h" #include "r600_resource.h" -#include "r600_state_inlines.h" #include "r600d.h" #include "r600_formats.h" From 3fccc14b2fb35aef95bc04dee46e280a48679299 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 5 Jul 2011 01:58:47 +0200 Subject: [PATCH 006/113] r600g: Get rid of the state_inlines headers. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/eg_state_inlines.h | 555 ------------------ src/gallium/drivers/r600/evergreen_state.c | 527 ++++++++++++++++- src/gallium/drivers/r600/r600_state.c | 519 +++++++++++++++- src/gallium/drivers/r600/r600_state_inlines.h | 547 ----------------- 4 files changed, 1044 insertions(+), 1104 deletions(-) delete mode 100644 src/gallium/drivers/r600/eg_state_inlines.h delete mode 100644 src/gallium/drivers/r600/r600_state_inlines.h diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h deleted file mode 100644 index d9f88de7ce9..00000000000 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ /dev/null @@ -1,555 +0,0 @@ -/* - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef EG_STATE_INLINES_H -#define EG_STATE_INLINES_H - -#include "util/u_format.h" -#include "evergreend.h" -#include "r600_formats.h" - -static INLINE uint32_t r600_translate_blend_function(int blend_func) -{ - switch (blend_func) { - case PIPE_BLEND_ADD: - return V_028780_COMB_DST_PLUS_SRC; - case PIPE_BLEND_SUBTRACT: - return V_028780_COMB_SRC_MINUS_DST; - case PIPE_BLEND_REVERSE_SUBTRACT: - return V_028780_COMB_DST_MINUS_SRC; - case PIPE_BLEND_MIN: - return V_028780_COMB_MIN_DST_SRC; - case PIPE_BLEND_MAX: - return V_028780_COMB_MAX_DST_SRC; - default: - R600_ERR("Unknown blend function %d\n", blend_func); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_blend_factor(int blend_fact) -{ - switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return V_028780_BLEND_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return V_028780_BLEND_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return V_028780_BLEND_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return V_028780_BLEND_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return V_028780_BLEND_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return V_028780_BLEND_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return V_028780_BLEND_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return V_028780_BLEND_CONST_ALPHA; - case PIPE_BLENDFACTOR_ZERO: - return V_028780_BLEND_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return V_028780_BLEND_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return V_028780_BLEND_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return V_028780_BLEND_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return V_028780_BLEND_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return V_028780_BLEND_ONE_MINUS_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: - return V_028780_BLEND_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return V_028780_BLEND_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return V_028780_BLEND_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return V_028780_BLEND_INV_SRC1_ALPHA; - default: - R600_ERR("Bad blend factor %d not supported!\n", blend_fact); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_stencil_op(int s_op) -{ - switch (s_op) { - case PIPE_STENCIL_OP_KEEP: - return V_028800_STENCIL_KEEP; - case PIPE_STENCIL_OP_ZERO: - return V_028800_STENCIL_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return V_028800_STENCIL_REPLACE; - case PIPE_STENCIL_OP_INCR: - return V_028800_STENCIL_INCR; - case PIPE_STENCIL_OP_DECR: - return V_028800_STENCIL_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return V_028800_STENCIL_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return V_028800_STENCIL_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return V_028800_STENCIL_INVERT; - default: - R600_ERR("Unknown stencil op %d", s_op); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_fill(uint32_t func) -{ - switch(func) { - case PIPE_POLYGON_MODE_FILL: - return 2; - case PIPE_POLYGON_MODE_LINE: - return 1; - case PIPE_POLYGON_MODE_POINT: - return 0; - default: - assert(0); - return 0; - } -} - -/* translates straight */ -static INLINE uint32_t r600_translate_ds_func(int func) -{ - return func; -} - -static inline unsigned r600_tex_wrap(unsigned wrap) -{ - switch (wrap) { - default: - case PIPE_TEX_WRAP_REPEAT: - return V_03C000_SQ_TEX_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return V_03C000_SQ_TEX_MIRROR; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; - } -} - -static inline unsigned r600_tex_filter(unsigned filter) -{ - switch (filter) { - default: - case PIPE_TEX_FILTER_NEAREST: - return V_03C000_SQ_TEX_XY_FILTER_POINT; - case PIPE_TEX_FILTER_LINEAR: - return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; - } -} - -static inline unsigned r600_tex_mipfilter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - return V_03C000_SQ_TEX_Z_FILTER_POINT; - case PIPE_TEX_MIPFILTER_LINEAR: - return V_03C000_SQ_TEX_Z_FILTER_LINEAR; - default: - case PIPE_TEX_MIPFILTER_NONE: - return V_03C000_SQ_TEX_Z_FILTER_NONE; - } -} - -static inline unsigned r600_tex_compare(unsigned compare) -{ - switch (compare) { - default: - case PIPE_FUNC_NEVER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; - case PIPE_FUNC_LESS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; - case PIPE_FUNC_EQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; - case PIPE_FUNC_LEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; - case PIPE_FUNC_GREATER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; - case PIPE_FUNC_NOTEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; - case PIPE_FUNC_GEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; - case PIPE_FUNC_ALWAYS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; - } -} - -static inline unsigned r600_tex_dim(unsigned dim) -{ - switch (dim) { - default: - case PIPE_TEXTURE_1D: - return V_030000_SQ_TEX_DIM_1D; - case PIPE_TEXTURE_1D_ARRAY: - return V_030000_SQ_TEX_DIM_1D_ARRAY; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - return V_030000_SQ_TEX_DIM_2D; - case PIPE_TEXTURE_2D_ARRAY: - return V_030000_SQ_TEX_DIM_2D_ARRAY; - case PIPE_TEXTURE_3D: - return V_030000_SQ_TEX_DIM_3D; - case PIPE_TEXTURE_CUBE: - return V_030000_SQ_TEX_DIM_CUBEMAP; - } -} - -static inline uint32_t r600_translate_dbformat(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - return V_028040_Z_16; - case PIPE_FORMAT_Z24X8_UNORM: - return V_028040_Z_24; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028040_Z_24; - default: - return ~0; - } -} - -static inline uint32_t r600_translate_stencilformat(enum pipe_format format) -{ - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - return 1; - else - return 0; -} - -static inline uint32_t r600_translate_colorswap(enum pipe_format format) -{ - switch (format) { - /* 8-bit buffers. */ - case PIPE_FORMAT_L4A4_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_A8_UNORM: - return V_028C70_SWAP_ALT_REV; - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_028C70_SWAP_STD; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_028C70_SWAP_STD_REV; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_Z16_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - return V_028C70_SWAP_ALT; - case PIPE_FORMAT_R8G8_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16_FLOAT: - return V_028C70_SWAP_STD; - - /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - return V_028C70_SWAP_STD_REV; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - return V_028C70_SWAP_ALT_REV; - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ - return V_028C70_SWAP_STD_REV; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_SWAP_STD; - - case PIPE_FORMAT_B10G10R10A2_UNORM: - return V_028C70_SWAP_ALT; - - case PIPE_FORMAT_R11G11B10_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16_UNORM: - return V_028C70_SWAP_STD; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_028C70_SWAP_STD; - default: - R600_ERR("unsupported colorswap format %d\n", format); - return ~0; - } - return ~0; -} - -static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) -{ - switch (format) { - /* 8-bit buffers. */ - case PIPE_FORMAT_L4A4_UNORM: - return V_028C70_COLOR_4_4; - - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_028C70_COLOR_8; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_028C70_COLOR_5_6_5; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_028C70_COLOR_1_5_5_5; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_028C70_COLOR_4_4_4_4; - - case PIPE_FORMAT_Z16_UNORM: - return V_028C70_COLOR_16; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - case PIPE_FORMAT_R8G8_UNORM: - return V_028C70_COLOR_8_8; - - case PIPE_FORMAT_R16_UNORM: - return V_028C70_COLOR_16; - - case PIPE_FORMAT_R16_FLOAT: - return V_028C70_COLOR_16_FLOAT; - - /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - return V_028C70_COLOR_8_8_8_8; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_COLOR_2_10_10_10; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028C70_COLOR_8_24; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_028C70_COLOR_24_8; - - case PIPE_FORMAT_R32_FLOAT: - return V_028C70_COLOR_32_FLOAT; - - case PIPE_FORMAT_R16G16_FLOAT: - return V_028C70_COLOR_16_16_FLOAT; - - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_UNORM: - return V_028C70_COLOR_16_16; - - case PIPE_FORMAT_R11G11B10_FLOAT: - return V_028C70_COLOR_10_11_11_FLOAT; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - return V_028C70_COLOR_16_16_16_16; - - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - return V_028C70_COLOR_16_16_16_16_FLOAT; - - case PIPE_FORMAT_R32G32_FLOAT: - return V_028C70_COLOR_32_32_FLOAT; - - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_SSCALED: - return V_028C70_COLOR_32_32; - - /* 96-bit buffers. */ - case PIPE_FORMAT_R32G32B32_FLOAT: - return V_028C70_COLOR_32_32_32_FLOAT; - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_028C70_COLOR_32_32_32_32; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return V_028C70_COLOR_32_32_32_32_FLOAT; - - /* YUV buffers. */ - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_YUYV: - default: - return ~0; /* Unsupported. */ - } -} - -static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) -{ - if (R600_BIG_ENDIAN) { - switch(colorformat) { - case V_028C70_COLOR_4_4: - return(ENDIAN_NONE); - - /* 8-bit buffers. */ - case V_028C70_COLOR_8: - return(ENDIAN_NONE); - - /* 16-bit buffers. */ - case V_028C70_COLOR_5_6_5: - case V_028C70_COLOR_1_5_5_5: - case V_028C70_COLOR_4_4_4_4: - case V_028C70_COLOR_16: - case V_028C70_COLOR_8_8: - return(ENDIAN_8IN16); - - /* 32-bit buffers. */ - case V_028C70_COLOR_8_8_8_8: - case V_028C70_COLOR_2_10_10_10: - case V_028C70_COLOR_8_24: - case V_028C70_COLOR_24_8: - case V_028C70_COLOR_32_FLOAT: - case V_028C70_COLOR_16_16_FLOAT: - case V_028C70_COLOR_16_16: - return(ENDIAN_8IN32); - - /* 64-bit buffers. */ - case V_028C70_COLOR_16_16_16_16: - case V_028C70_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); - - case V_028C70_COLOR_32_32_FLOAT: - case V_028C70_COLOR_32_32: - return(ENDIAN_8IN32); - - /* 96-bit buffers. */ - case V_028C70_COLOR_32_32_32_FLOAT: - /* 128-bit buffers. */ - case V_028C70_COLOR_32_32_32_32_FLOAT: - case V_028C70_COLOR_32_32_32_32: - return(ENDIAN_8IN32); - default: - return ENDIAN_NONE; /* Unsupported. */ - } - } else { - return ENDIAN_NONE; - } -} - -static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) -{ - return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; -} - -static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0 && - r600_translate_colorswap(format) != ~0; -} - -static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) -{ - return r600_translate_dbformat(format) != ~0; -} - -#endif diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 18d54cc1e26..887f52e67db 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -46,7 +46,532 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "eg_state_inlines.h" +#include "r600_formats.h" + +static uint32_t r600_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return V_028780_COMB_DST_PLUS_SRC; + case PIPE_BLEND_SUBTRACT: + return V_028780_COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028780_COMB_DST_MINUS_SRC; + case PIPE_BLEND_MIN: + return V_028780_COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return V_028780_COMB_MAX_DST_SRC; + default: + R600_ERR("Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return V_028780_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V_028780_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028780_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V_028780_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return V_028780_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V_028780_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V_028780_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V_028780_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return V_028780_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V_028780_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V_028780_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V_028780_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V_028780_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V_028780_BLEND_ONE_MINUS_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return V_028780_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return V_028780_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return V_028780_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return V_028780_BLEND_INV_SRC1_ALPHA; + default: + R600_ERR("Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return V_028800_STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return V_028800_STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return V_028800_STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return V_028800_STENCIL_INCR; + case PIPE_STENCIL_OP_DECR: + return V_028800_STENCIL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return V_028800_STENCIL_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return V_028800_STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return V_028800_STENCIL_INVERT; + default: + R600_ERR("Unknown stencil op %d", s_op); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + +/* translates straight */ +static uint32_t r600_translate_ds_func(int func) +{ + return func; +} + +static unsigned r600_tex_wrap(unsigned wrap) +{ + switch (wrap) { + default: + case PIPE_TEX_WRAP_REPEAT: + return V_03C000_SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return V_03C000_SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; + } +} + +static unsigned r600_tex_filter(unsigned filter) +{ + switch (filter) { + default: + case PIPE_TEX_FILTER_NEAREST: + return V_03C000_SQ_TEX_XY_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; + } +} + +static unsigned r600_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + return V_03C000_SQ_TEX_Z_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return V_03C000_SQ_TEX_Z_FILTER_LINEAR; + default: + case PIPE_TEX_MIPFILTER_NONE: + return V_03C000_SQ_TEX_Z_FILTER_NONE; + } +} + +static unsigned r600_tex_compare(unsigned compare) +{ + switch (compare) { + default: + case PIPE_FUNC_NEVER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; + case PIPE_FUNC_LESS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; + case PIPE_FUNC_EQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; + case PIPE_FUNC_LEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case PIPE_FUNC_GREATER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; + case PIPE_FUNC_NOTEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case PIPE_FUNC_ALWAYS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; + } +} + +static unsigned r600_tex_dim(unsigned dim) +{ + switch (dim) { + default: + case PIPE_TEXTURE_1D: + return V_030000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_030000_SQ_TEX_DIM_1D_ARRAY; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return V_030000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_030000_SQ_TEX_DIM_2D_ARRAY; + case PIPE_TEXTURE_3D: + return V_030000_SQ_TEX_DIM_3D; + case PIPE_TEXTURE_CUBE: + return V_030000_SQ_TEX_DIM_CUBEMAP; + } +} + +static uint32_t r600_translate_dbformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return V_028040_Z_16; + case PIPE_FORMAT_Z24X8_UNORM: + return V_028040_Z_24; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028040_Z_24; + default: + return ~0U; + } +} + +static uint32_t r600_translate_stencilformat(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + return 1; + else + return 0; +} + +static uint32_t r600_translate_colorswap(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_A8_UNORM: + return V_028C70_SWAP_ALT_REV; + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_028C70_SWAP_STD; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_028C70_SWAP_STD_REV; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + return V_028C70_SWAP_ALT; + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_FLOAT: + return V_028C70_SWAP_STD; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + return V_028C70_SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return V_028C70_SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ + return V_028C70_SWAP_STD_REV; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_R11G11B10_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_SWAP_STD; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_SWAP_STD; + default: + R600_ERR("unsupported colorswap format %d\n", format); + return ~0U; + } + return ~0U; +} + +static uint32_t r600_translate_colorformat(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_COLOR_4_4; + + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_028C70_COLOR_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_028C70_COLOR_5_6_5; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_028C70_COLOR_1_5_5_5; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_028C70_COLOR_4_4_4_4; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_COLOR_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_COLOR_16; + + case PIPE_FORMAT_R16_FLOAT: + return V_028C70_COLOR_16_FLOAT; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + return V_028C70_COLOR_8_8_8_8; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_028C70_COLOR_2_10_10_10; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028C70_COLOR_8_24; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_COLOR_24_8; + + case PIPE_FORMAT_R32_FLOAT: + return V_028C70_COLOR_32_FLOAT; + + case PIPE_FORMAT_R16G16_FLOAT: + return V_028C70_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_COLOR_16_16; + + case PIPE_FORMAT_R11G11B10_FLOAT: + return V_028C70_COLOR_10_11_11_FLOAT; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return V_028C70_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return V_028C70_COLOR_16_16_16_16_FLOAT; + + case PIPE_FORMAT_R32G32_FLOAT: + return V_028C70_COLOR_32_32_FLOAT; + + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_SSCALED: + return V_028C70_COLOR_32_32; + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_028C70_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_COLOR_32_32_32_32; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_028C70_COLOR_32_32_32_32_FLOAT; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + return ~0U; /* Unsupported. */ + } +} + +static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ + if (R600_BIG_ENDIAN) { + switch(colorformat) { + case V_028C70_COLOR_4_4: + return(ENDIAN_NONE); + + /* 8-bit buffers. */ + case V_028C70_COLOR_8: + return(ENDIAN_NONE); + + /* 16-bit buffers. */ + case V_028C70_COLOR_5_6_5: + case V_028C70_COLOR_1_5_5_5: + case V_028C70_COLOR_4_4_4_4: + case V_028C70_COLOR_16: + case V_028C70_COLOR_8_8: + return(ENDIAN_8IN16); + + /* 32-bit buffers. */ + case V_028C70_COLOR_8_8_8_8: + case V_028C70_COLOR_2_10_10_10: + case V_028C70_COLOR_8_24: + case V_028C70_COLOR_24_8: + case V_028C70_COLOR_32_FLOAT: + case V_028C70_COLOR_16_16_FLOAT: + case V_028C70_COLOR_16_16: + return(ENDIAN_8IN32); + + /* 64-bit buffers. */ + case V_028C70_COLOR_16_16_16_16: + case V_028C70_COLOR_16_16_16_16_FLOAT: + return(ENDIAN_8IN16); + + case V_028C70_COLOR_32_32_FLOAT: + case V_028C70_COLOR_32_32: + return(ENDIAN_8IN32); + + /* 96-bit buffers. */ + case V_028C70_COLOR_32_32_32_FLOAT: + /* 128-bit buffers. */ + case V_028C70_COLOR_32_32_32_32_FLOAT: + case V_028C70_COLOR_32_32_32_32: + return(ENDIAN_8IN32); + default: + return ENDIAN_NONE; /* Unsupported. */ + } + } else { + return ENDIAN_NONE; + } +} + +static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) +{ + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0U; +} + +static bool r600_is_colorbuffer_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0U && + r600_translate_colorswap(format) != ~0U; +} + +static bool r600_is_zs_format_supported(enum pipe_format format) +{ + return r600_translate_dbformat(format) != ~0U; +} boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e329317b3ab..3085cd9a87a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -44,7 +44,524 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" -#include "r600_state_inlines.h" +#include "r600_formats.h" + +static uint32_t r600_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return V_028804_COMB_DST_PLUS_SRC; + case PIPE_BLEND_SUBTRACT: + return V_028804_COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028804_COMB_DST_MINUS_SRC; + case PIPE_BLEND_MIN: + return V_028804_COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return V_028804_COMB_MAX_DST_SRC; + default: + R600_ERR("Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return V_028804_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V_028804_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028804_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V_028804_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return V_028804_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V_028804_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V_028804_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V_028804_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return V_028804_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V_028804_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V_028804_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V_028804_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V_028804_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return V_028804_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return V_028804_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return V_028804_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return V_028804_BLEND_INV_SRC1_ALPHA; + default: + R600_ERR("Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return V_028800_STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return V_028800_STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return V_028800_STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return V_028800_STENCIL_INCR; + case PIPE_STENCIL_OP_DECR: + return V_028800_STENCIL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return V_028800_STENCIL_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return V_028800_STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return V_028800_STENCIL_INVERT; + default: + R600_ERR("Unknown stencil op %d", s_op); + assert(0); + break; + } + return 0; +} + +static uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + +/* translates straight */ +static uint32_t r600_translate_ds_func(int func) +{ + return func; +} + +static unsigned r600_tex_wrap(unsigned wrap) +{ + switch (wrap) { + default: + case PIPE_TEX_WRAP_REPEAT: + return V_03C000_SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return V_03C000_SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; + } +} + +static unsigned r600_tex_filter(unsigned filter) +{ + switch (filter) { + default: + case PIPE_TEX_FILTER_NEAREST: + return V_03C000_SQ_TEX_XY_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; + } +} + +static unsigned r600_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + return V_03C000_SQ_TEX_Z_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return V_03C000_SQ_TEX_Z_FILTER_LINEAR; + default: + case PIPE_TEX_MIPFILTER_NONE: + return V_03C000_SQ_TEX_Z_FILTER_NONE; + } +} + +static unsigned r600_tex_compare(unsigned compare) +{ + switch (compare) { + default: + case PIPE_FUNC_NEVER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; + case PIPE_FUNC_LESS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; + case PIPE_FUNC_EQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; + case PIPE_FUNC_LEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case PIPE_FUNC_GREATER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; + case PIPE_FUNC_NOTEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case PIPE_FUNC_ALWAYS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; + } +} + +static unsigned r600_tex_dim(unsigned dim) +{ + switch (dim) { + default: + case PIPE_TEXTURE_1D: + return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_038000_SQ_TEX_DIM_1D_ARRAY; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_038000_SQ_TEX_DIM_2D_ARRAY; + case PIPE_TEXTURE_3D: + return V_038000_SQ_TEX_DIM_3D; + case PIPE_TEXTURE_CUBE: + return V_038000_SQ_TEX_DIM_CUBEMAP; + } +} + +static uint32_t r600_translate_dbformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return V_028010_DEPTH_16; + case PIPE_FORMAT_Z24X8_UNORM: + return V_028010_DEPTH_X8_24; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028010_DEPTH_8_24; + default: + return ~0U; + } +} + +static uint32_t r600_translate_colorswap(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + return V_0280A0_SWAP_ALT_REV; + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_SWAP_ALT; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_SWAP_STD_REV; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + return V_0280A0_SWAP_ALT; + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_FLOAT: + return V_0280A0_SWAP_STD; + + /* 32-bit buffers. */ + + case PIPE_FORMAT_A8B8G8R8_SRGB: + return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return V_0280A0_SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ + return V_0280A0_SWAP_STD_REV; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_R11G11B10_FLOAT: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + return V_0280A0_SWAP_STD; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_SWAP_STD; + default: + R600_ERR("unsupported colorswap format %d\n", format); + return ~0U; + } + return ~0U; +} + +static uint32_t r600_translate_colorformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_COLOR_4_4; + + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_COLOR_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_COLOR_5_6_5; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_COLOR_1_5_5_5; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_COLOR_4_4_4_4; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_COLOR_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_COLOR_16; + + case PIPE_FORMAT_R16_FLOAT: + return V_0280A0_COLOR_16_FLOAT; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + return V_0280A0_COLOR_8_8_8_8; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_COLOR_2_10_10_10; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_0280A0_COLOR_8_24; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_COLOR_24_8; + + case PIPE_FORMAT_R32_FLOAT: + return V_0280A0_COLOR_32_FLOAT; + + case PIPE_FORMAT_R16G16_FLOAT: + return V_0280A0_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_COLOR_16_16; + + case PIPE_FORMAT_R11G11B10_FLOAT: + return V_0280A0_COLOR_10_11_11_FLOAT; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return V_0280A0_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return V_0280A0_COLOR_16_16_16_16_FLOAT; + + case PIPE_FORMAT_R32G32_FLOAT: + return V_0280A0_COLOR_32_32_FLOAT; + + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_SSCALED: + return V_0280A0_COLOR_32_32; + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_0280A0_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_COLOR_32_32_32_32; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + return ~0U; /* Unsupported. */ + } +} + +static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ + if (R600_BIG_ENDIAN) { + switch(colorformat) { + case V_0280A0_COLOR_4_4: + return(ENDIAN_NONE); + + /* 8-bit buffers. */ + case V_0280A0_COLOR_8: + return(ENDIAN_NONE); + + /* 16-bit buffers. */ + case V_0280A0_COLOR_5_6_5: + case V_0280A0_COLOR_1_5_5_5: + case V_0280A0_COLOR_4_4_4_4: + case V_0280A0_COLOR_16: + case V_0280A0_COLOR_8_8: + return(ENDIAN_8IN16); + + /* 32-bit buffers. */ + case V_0280A0_COLOR_8_8_8_8: + case V_0280A0_COLOR_2_10_10_10: + case V_0280A0_COLOR_8_24: + case V_0280A0_COLOR_24_8: + case V_0280A0_COLOR_32_FLOAT: + case V_0280A0_COLOR_16_16_FLOAT: + case V_0280A0_COLOR_16_16: + return(ENDIAN_8IN32); + + /* 64-bit buffers. */ + case V_0280A0_COLOR_16_16_16_16: + case V_0280A0_COLOR_16_16_16_16_FLOAT: + return(ENDIAN_8IN16); + + case V_0280A0_COLOR_32_32_FLOAT: + case V_0280A0_COLOR_32_32: + return(ENDIAN_8IN32); + + /* 128-bit buffers. */ + case V_0280A0_COLOR_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32: + return(ENDIAN_8IN32); + default: + return ENDIAN_NONE; /* Unsupported. */ + } + } else { + return ENDIAN_NONE; + } +} + +static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) +{ + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0U; +} + +static bool r600_is_colorbuffer_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0U && + r600_translate_colorswap(format) != ~0U; +} + +static bool r600_is_zs_format_supported(enum pipe_format format) +{ + return r600_translate_dbformat(format) != ~0U; +} boolean r600_is_format_supported(struct pipe_screen *screen, enum pipe_format format, diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h deleted file mode 100644 index e9479bd846c..00000000000 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_STATE_INLINES_H -#define R600_STATE_INLINES_H - -#include "util/u_format.h" -#include "r600d.h" -#include "r600_formats.h" - -static INLINE uint32_t r600_translate_blend_function(int blend_func) -{ - switch (blend_func) { - case PIPE_BLEND_ADD: - return V_028804_COMB_DST_PLUS_SRC; - case PIPE_BLEND_SUBTRACT: - return V_028804_COMB_SRC_MINUS_DST; - case PIPE_BLEND_REVERSE_SUBTRACT: - return V_028804_COMB_DST_MINUS_SRC; - case PIPE_BLEND_MIN: - return V_028804_COMB_MIN_DST_SRC; - case PIPE_BLEND_MAX: - return V_028804_COMB_MAX_DST_SRC; - default: - R600_ERR("Unknown blend function %d\n", blend_func); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_blend_factor(int blend_fact) -{ - switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return V_028804_BLEND_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return V_028804_BLEND_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return V_028804_BLEND_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return V_028804_BLEND_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return V_028804_BLEND_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return V_028804_BLEND_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return V_028804_BLEND_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return V_028804_BLEND_CONST_ALPHA; - case PIPE_BLENDFACTOR_ZERO: - return V_028804_BLEND_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return V_028804_BLEND_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return V_028804_BLEND_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return V_028804_BLEND_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return V_028804_BLEND_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC1_COLOR: - return V_028804_BLEND_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return V_028804_BLEND_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return V_028804_BLEND_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return V_028804_BLEND_INV_SRC1_ALPHA; - default: - R600_ERR("Bad blend factor %d not supported!\n", blend_fact); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_stencil_op(int s_op) -{ - switch (s_op) { - case PIPE_STENCIL_OP_KEEP: - return V_028800_STENCIL_KEEP; - case PIPE_STENCIL_OP_ZERO: - return V_028800_STENCIL_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return V_028800_STENCIL_REPLACE; - case PIPE_STENCIL_OP_INCR: - return V_028800_STENCIL_INCR; - case PIPE_STENCIL_OP_DECR: - return V_028800_STENCIL_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return V_028800_STENCIL_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return V_028800_STENCIL_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return V_028800_STENCIL_INVERT; - default: - R600_ERR("Unknown stencil op %d", s_op); - assert(0); - break; - } - return 0; -} - -static INLINE uint32_t r600_translate_fill(uint32_t func) -{ - switch(func) { - case PIPE_POLYGON_MODE_FILL: - return 2; - case PIPE_POLYGON_MODE_LINE: - return 1; - case PIPE_POLYGON_MODE_POINT: - return 0; - default: - assert(0); - return 0; - } -} - -/* translates straight */ -static INLINE uint32_t r600_translate_ds_func(int func) -{ - return func; -} - -static inline unsigned r600_tex_wrap(unsigned wrap) -{ - switch (wrap) { - default: - case PIPE_TEX_WRAP_REPEAT: - return V_03C000_SQ_TEX_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return V_03C000_SQ_TEX_MIRROR; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; - } -} - -static inline unsigned r600_tex_filter(unsigned filter) -{ - switch (filter) { - default: - case PIPE_TEX_FILTER_NEAREST: - return V_03C000_SQ_TEX_XY_FILTER_POINT; - case PIPE_TEX_FILTER_LINEAR: - return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; - } -} - -static inline unsigned r600_tex_mipfilter(unsigned filter) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - return V_03C000_SQ_TEX_Z_FILTER_POINT; - case PIPE_TEX_MIPFILTER_LINEAR: - return V_03C000_SQ_TEX_Z_FILTER_LINEAR; - default: - case PIPE_TEX_MIPFILTER_NONE: - return V_03C000_SQ_TEX_Z_FILTER_NONE; - } -} - -static inline unsigned r600_tex_compare(unsigned compare) -{ - switch (compare) { - default: - case PIPE_FUNC_NEVER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; - case PIPE_FUNC_LESS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; - case PIPE_FUNC_EQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; - case PIPE_FUNC_LEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; - case PIPE_FUNC_GREATER: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; - case PIPE_FUNC_NOTEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; - case PIPE_FUNC_GEQUAL: - return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; - case PIPE_FUNC_ALWAYS: - return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; - } -} - -static inline unsigned r600_tex_dim(unsigned dim) -{ - switch (dim) { - default: - case PIPE_TEXTURE_1D: - return V_038000_SQ_TEX_DIM_1D; - case PIPE_TEXTURE_1D_ARRAY: - return V_038000_SQ_TEX_DIM_1D_ARRAY; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - return V_038000_SQ_TEX_DIM_2D; - case PIPE_TEXTURE_2D_ARRAY: - return V_038000_SQ_TEX_DIM_2D_ARRAY; - case PIPE_TEXTURE_3D: - return V_038000_SQ_TEX_DIM_3D; - case PIPE_TEXTURE_CUBE: - return V_038000_SQ_TEX_DIM_CUBEMAP; - } -} - -static inline uint32_t r600_translate_dbformat(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - return V_028010_DEPTH_16; - case PIPE_FORMAT_Z24X8_UNORM: - return V_028010_DEPTH_X8_24; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_028010_DEPTH_8_24; - default: - return ~0; - } -} - -static inline uint32_t r600_translate_colorswap(enum pipe_format format) -{ - switch (format) { - /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - return V_0280A0_SWAP_ALT_REV; - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_L4A4_UNORM: - return V_0280A0_SWAP_ALT; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_0280A0_SWAP_STD_REV; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_Z16_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_R8G8_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16_FLOAT: - return V_0280A0_SWAP_STD; - - /* 32-bit buffers. */ - - case PIPE_FORMAT_A8B8G8R8_SRGB: - return V_0280A0_SWAP_STD_REV; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - return V_0280A0_SWAP_ALT_REV; - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ - return V_0280A0_SWAP_STD_REV; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_SWAP_STD; - - case PIPE_FORMAT_B10G10R10A2_UNORM: - return V_0280A0_SWAP_ALT; - - case PIPE_FORMAT_R11G11B10_FLOAT: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - return V_0280A0_SWAP_STD; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_0280A0_SWAP_STD; - default: - R600_ERR("unsupported colorswap format %d\n", format); - return ~0; - } - return ~0; -} - -static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_L4A4_UNORM: - return V_0280A0_COLOR_4_4; - - /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return V_0280A0_COLOR_8; - - /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return V_0280A0_COLOR_5_6_5; - - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return V_0280A0_COLOR_1_5_5_5; - - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return V_0280A0_COLOR_4_4_4_4; - - case PIPE_FORMAT_Z16_UNORM: - return V_0280A0_COLOR_16; - - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_L8A8_SRGB: - case PIPE_FORMAT_R8G8_UNORM: - return V_0280A0_COLOR_8_8; - - case PIPE_FORMAT_R16_UNORM: - return V_0280A0_COLOR_16; - - case PIPE_FORMAT_R16_FLOAT: - return V_0280A0_COLOR_16_FLOAT; - - /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - return V_0280A0_COLOR_8_8_8_8; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_COLOR_2_10_10_10; - - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_0280A0_COLOR_8_24; - - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return V_0280A0_COLOR_24_8; - - case PIPE_FORMAT_R32_FLOAT: - return V_0280A0_COLOR_32_FLOAT; - - case PIPE_FORMAT_R16G16_FLOAT: - return V_0280A0_COLOR_16_16_FLOAT; - - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_UNORM: - return V_0280A0_COLOR_16_16; - - case PIPE_FORMAT_R11G11B10_FLOAT: - return V_0280A0_COLOR_10_11_11_FLOAT; - - /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - return V_0280A0_COLOR_16_16_16_16; - - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - return V_0280A0_COLOR_16_16_16_16_FLOAT; - - case PIPE_FORMAT_R32G32_FLOAT: - return V_0280A0_COLOR_32_32_FLOAT; - - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_SSCALED: - return V_0280A0_COLOR_32_32; - - /* 96-bit buffers. */ - case PIPE_FORMAT_R32G32B32_FLOAT: - return V_0280A0_COLOR_32_32_32_FLOAT; - - /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return V_0280A0_COLOR_32_32_32_32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - return V_0280A0_COLOR_32_32_32_32; - - /* YUV buffers. */ - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_YUYV: - default: - return ~0; /* Unsupported. */ - } -} - -static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) -{ - if (R600_BIG_ENDIAN) { - switch(colorformat) { - case V_0280A0_COLOR_4_4: - return(ENDIAN_NONE); - - /* 8-bit buffers. */ - case V_0280A0_COLOR_8: - return(ENDIAN_NONE); - - /* 16-bit buffers. */ - case V_0280A0_COLOR_5_6_5: - case V_0280A0_COLOR_1_5_5_5: - case V_0280A0_COLOR_4_4_4_4: - case V_0280A0_COLOR_16: - case V_0280A0_COLOR_8_8: - return(ENDIAN_8IN16); - - /* 32-bit buffers. */ - case V_0280A0_COLOR_8_8_8_8: - case V_0280A0_COLOR_2_10_10_10: - case V_0280A0_COLOR_8_24: - case V_0280A0_COLOR_24_8: - case V_0280A0_COLOR_32_FLOAT: - case V_0280A0_COLOR_16_16_FLOAT: - case V_0280A0_COLOR_16_16: - return(ENDIAN_8IN32); - - /* 64-bit buffers. */ - case V_0280A0_COLOR_16_16_16_16: - case V_0280A0_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); - - case V_0280A0_COLOR_32_32_FLOAT: - case V_0280A0_COLOR_32_32: - return(ENDIAN_8IN32); - - /* 128-bit buffers. */ - case V_0280A0_COLOR_32_32_32_FLOAT: - case V_0280A0_COLOR_32_32_32_32_FLOAT: - case V_0280A0_COLOR_32_32_32_32: - return(ENDIAN_8IN32); - default: - return ENDIAN_NONE; /* Unsupported. */ - } - } else { - return ENDIAN_NONE; - } -} - -static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) -{ - return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; -} - -static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0 && - r600_translate_colorswap(format) != ~0; -} - -static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) -{ - return r600_translate_dbformat(format) != ~0; -} - -#endif From 9bf4c30d73f5066db094fa9e72e6c4de7d6f9596 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 4 Jul 2011 18:30:39 +0400 Subject: [PATCH 007/113] r600g: fix buffer offset in r600_query_begin --- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 19dc729d00a..c98a1b15701 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1753,7 +1753,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL); if (results) { - memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4); + memset(results + query->num_results, 0, ctx->max_db * 4 * 4); for (i = num_backends; i < ctx->max_db; i++) { results[(i * 4)+1] = 0x80000000; From fbe9d4261f94b8a22ae04dccb8201a6762b66d40 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 4 Jul 2011 18:30:40 +0400 Subject: [PATCH 008/113] r600g: reduce flushes for queries --- src/gallium/drivers/r600/r600.h | 3 ++- src/gallium/drivers/r600/r600_blit.c | 2 +- src/gallium/drivers/r600/r600_query.c | 5 +---- src/gallium/winsys/r600/drm/r600_hw_context.c | 13 ++++++++----- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 151e831e5c6..2af4d311f60 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -228,6 +228,7 @@ struct r600_query { #define R600_QUERY_STATE_STARTED (1 << 0) #define R600_QUERY_STATE_ENDED (1 << 1) #define R600_QUERY_STATE_SUSPENDED (1 << 2) +#define R600_QUERY_STATE_FLUSHED (1 << 3) #define R600_CONTEXT_DRAW_PENDING (1 << 0) #define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) @@ -294,7 +295,7 @@ boolean r600_context_query_result(struct r600_context *ctx, void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); -void r600_context_queries_resume(struct r600_context *ctx); +void r600_context_queries_resume(struct r600_context *ctx, boolean flushed); void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait); void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence, diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 6171d285bb9..35e68b6e222 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -97,7 +97,7 @@ static void r600_blitter_end(struct pipe_context *ctx) rctx->saved_render_cond_mode); rctx->saved_render_cond = NULL; } - r600_context_queries_resume(&rctx->ctx); + r600_context_queries_resume(&rctx->ctx, FALSE); rctx->blit = false; } diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index bedb48b6031..174505c75e9 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -61,10 +61,7 @@ static boolean r600_get_query_result(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - if (rquery->num_results) { - ctx->flush(ctx, NULL); - } - return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); + return r600_context_query_result(&rctx->ctx, rquery, wait, vresult); } static void r600_render_condition(struct pipe_context *ctx, diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index c98a1b15701..e3fb6b18394 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1565,7 +1565,7 @@ void r600_context_flush(struct r600_context *ctx) r600_init_cs(ctx); /* resume queries */ - r600_context_queries_resume(ctx); + r600_context_queries_resume(ctx, TRUE); /* set all valid group as dirty so they get reemited on * next draw command @@ -1741,7 +1741,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) /* if query buffer is full force a flush */ if (query->num_results*4 >= query->buffer_size - 16) { - r600_context_flush(ctx); + if (!(query->state & R600_QUERY_STATE_FLUSHED)) + r600_context_flush(ctx); r600_query_result(ctx, query, TRUE); } @@ -1809,6 +1810,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); query->state ^= R600_QUERY_STATE_STARTED; query->state |= R600_QUERY_STATE_ENDED; + query->state &= ~R600_QUERY_STATE_FLUSHED; ctx->num_query_running--; } @@ -1877,7 +1879,7 @@ boolean r600_context_query_result(struct r600_context *ctx, { uint64_t *result = (uint64_t*)vresult; - if (query->num_results) { + if (query->num_results && !(query->state & R600_QUERY_STATE_FLUSHED)) { r600_context_flush(ctx); } if (!r600_query_result(ctx, query, wait)) @@ -1902,7 +1904,7 @@ void r600_context_queries_suspend(struct r600_context *ctx) } } -void r600_context_queries_resume(struct r600_context *ctx) +void r600_context_queries_resume(struct r600_context *ctx, boolean flushed) { struct r600_query *query; @@ -1910,6 +1912,7 @@ void r600_context_queries_resume(struct r600_context *ctx) if (query->state & R600_QUERY_STATE_SUSPENDED) { r600_query_begin(ctx, query); query->state ^= R600_QUERY_STATE_SUSPENDED; - } + } else if (flushed && query->state==R600_QUERY_STATE_ENDED) + query->state |= R600_QUERY_STATE_FLUSHED; } } From 971e1b743eeaa4f1fc5af4c0a3af78422aaa7b24 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 4 Jul 2011 18:30:41 +0400 Subject: [PATCH 009/113] r600g: fix bo map usage flags in r600_query_begin --- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index e3fb6b18394..81e26f61158 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1752,7 +1752,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) u32 *results; int i; - results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_WRITE, NULL); if (results) { memset(results + query->num_results, 0, ctx->max_db * 4 * 4); From 6bde225b8b5791588837295b3b89ac132095a6f7 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 4 Jul 2011 18:30:42 +0400 Subject: [PATCH 010/113] r600g: fix buffer overflow check in r600_query_begin --- src/gallium/winsys/r600/drm/r600_hw_context.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 81e26f61158..633cd35f7a7 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1725,7 +1725,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { - unsigned required_space; + unsigned required_space, required_buffer; int num_backends = r600_get_num_backends(ctx->radeon); /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ @@ -1739,8 +1739,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) r600_context_flush(ctx); } + required_buffer = query->num_results + + 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); + /* if query buffer is full force a flush */ - if (query->num_results*4 >= query->buffer_size - 16) { + if (required_buffer*4 > query->buffer_size) { if (!(query->state & R600_QUERY_STATE_FLUSHED)) r600_context_flush(ctx); r600_query_result(ctx, query, TRUE); From 26e7436ad3e757ed10cb477f76ab3fdce9710923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Sun, 3 Jul 2011 19:43:19 -0700 Subject: [PATCH 011/113] i915g: Move back to the old method for target format fixup. --- src/gallium/drivers/i915/i915_state.c | 2 +- src/gallium/drivers/i915/i915_state_emit.c | 97 +++++++++------------- 2 files changed, 41 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index f412626955d..2812de1fe80 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -244,7 +244,7 @@ i915_create_sampler_state(struct pipe_context *pipe, /* Shadow: */ - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { cso->state[0] |= (SS2_SHADOW_ENABLE | i915_translate_shadow_compare_func(sampler->compare_func)); diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 39fb13aec7e..4f447962bb9 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -346,97 +346,80 @@ emit_constants(struct i915_context *i915) static const struct { enum pipe_format format; - uint hw_shift_R; - uint hw_shift_G; - uint hw_shift_B; - uint hw_shift_A; + uint hw_swizzle; } fixup_formats[] = { - { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */}, - { PIPE_FORMAT_L8_UNORM, 28, 28, 28, 16 /* RRRA */}, - { PIPE_FORMAT_I8_UNORM, 28, 28, 28, 16 /* RRRA */}, - { PIPE_FORMAT_A8_UNORM, 16, 16, 16, 16 /* AAAA */}, - { PIPE_FORMAT_NONE, 0, 0, 0, 0}, + { PIPE_FORMAT_R8G8B8A8_UNORM, 0x21030000 /* BGRA */}, + { PIPE_FORMAT_L8_UNORM, 0x00030000 /* RRRA */}, + { PIPE_FORMAT_I8_UNORM, 0x00030000 /* RRRA */}, + { PIPE_FORMAT_A8_UNORM, 0x33330000 /* AAAA */}, + { PIPE_FORMAT_NONE, 0x00000000}, }; -static boolean need_fixup(struct pipe_surface* p) +static uint need_target_fixup(struct pipe_surface* p) { enum pipe_format f; - /* if we don't have a surface bound yet, we don't need to fixup the shader */ if (!p) - return FALSE; + return 0; f = p->format; for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) if (fixup_formats[i].format == f) - return TRUE; + return 1; - return FALSE; + return 0; } -static uint fixup_swizzle(enum pipe_format f, uint v) +static uint fixup_swizzle(enum pipe_format f) { - int i; - - for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) + for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) if (fixup_formats[i].format == f) - break; + return fixup_formats[i].hw_swizzle; - if (fixup_formats[i].format == PIPE_FORMAT_NONE) - return v; - - uint rgba = v & 0xFFFF0000; - - v &= 0xFFFF; - v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28; - v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24; - v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20; - v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16; - - return v; + return 0; } static void validate_program(struct i915_context *i915, unsigned *batch_space) { - *batch_space = i915->fs->program_len; + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + uint additional_size = need_target_fixup(cbuf_surface); + + /* we need more batch space if we want to emulate rgba framebuffers */ + *batch_space = i915->fs->program_len + 3 * additional_size; } static void emit_program(struct i915_context *i915) { struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - boolean need_format_fixup = need_fixup(cbuf_surface); - int i; - int fixup_offset = -1; + uint target_fixup = need_target_fixup(cbuf_surface); + uint i; /* we should always have, at least, a pass-through program */ assert(i915->fs->program_len > 0); - if (need_format_fixup) { - /* Find where we emit the output color */ - for (i = i915->fs->program_len - 3; i>0; i-=3) { - uint instr = i915->fs->program[i]; - if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) == - (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) { - /* Found it! */ - fixup_offset = i + 1; - break; - } - } - if (fixup_offset == -1) { - need_format_fixup = FALSE; - debug_printf("couldn't find fixup offset\n"); - } + { + /* first word has the size, we have to adjust that */ + uint size = (i915->fs->program[0]); + size += target_fixup * 3; + OUT_BATCH(size); } - /* emit the program to the hw */ - for (i = 0; i < i915->fs->program_len; i++) { - if (need_format_fixup && (i == fixup_offset) ) { - uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]); - OUT_BATCH(v); - } else - OUT_BATCH(i915->fs->program[i]); + /* output the declarations of the program */ + for (i=1 ; i < i915->fs->program_len; i++) + OUT_BATCH(i915->fs->program[i]); + + /* we emit an additional mov with swizzle to fake RGBA framebuffers */ + if (target_fixup) { + /* mov out_color, out_color.zyxw */ + OUT_BATCH(A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)); + OUT_BATCH(fixup_swizzle(cbuf_surface->format)); + OUT_BATCH(0); } } From cc78eb63cdc0022684ca6816f258ea4492431916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Sun, 3 Jul 2011 19:44:02 -0700 Subject: [PATCH 012/113] i915g: Add comment about DDX/DDY. --- src/gallium/drivers/i915/i915_fpc_translate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 0cbd4f2d748..df606e27e26 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -450,6 +450,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, * * Possible concerns: * + * DDX, DDY -- return 0 * SIN, COS -- could use another taylor step? * LIT -- results seem a little different to sw mesa * LOG -- different to mesa on negative numbers, but this is conformant. From 2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Mon, 4 Jul 2011 20:21:37 -0700 Subject: [PATCH 013/113] i915g: introduce the tiny shader optimizer. --- src/gallium/drivers/i915/Makefile | 1 + src/gallium/drivers/i915/SConscript | 1 + src/gallium/drivers/i915/i915_fpc.h | 88 ++++++++ src/gallium/drivers/i915/i915_fpc_emit.c | 1 - src/gallium/drivers/i915/i915_fpc_optimize.c | 182 +++++++++++++++ src/gallium/drivers/i915/i915_fpc_translate.c | 207 +++++++++--------- 6 files changed, 378 insertions(+), 102 deletions(-) create mode 100644 src/gallium/drivers/i915/i915_fpc_optimize.c diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile index 778124728bb..36197fbc93b 100644 --- a/src/gallium/drivers/i915/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ i915_resource_buffer.c \ i915_fpc_emit.c \ i915_fpc_translate.c \ + i915_fpc_optimize.c \ i915_surface.c include ../../Makefile.template diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 98370601b7f..76f597001fe 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -14,6 +14,7 @@ i915 = env.ConvenienceLibrary( 'i915_flush.c', 'i915_fpc_emit.c', 'i915_fpc_translate.c', + 'i915_fpc_optimize.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', 'i915_query.c', diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 509395cf1f5..b760bc461a1 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -33,7 +33,9 @@ #include "i915_context.h" #include "i915_reg.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #define I915_PROGRAM_SIZE 192 @@ -207,4 +209,90 @@ extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); +/*====================================================================== + * i915_fpc_optimize.c + */ + + +struct i915_src_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned SwizzleX : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 3; /* TGSI_SWIZZLE_ */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ +}; + +/* Additional swizzle supported in i915 */ +#define TGSI_SWIZZLE_ZERO 4 +#define TGSI_SWIZZLE_ONE 5 + +struct i915_dst_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Padding : 6; +}; + + +struct i915_full_dst_register +{ + struct i915_dst_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_src_register +{ + struct i915_src_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_instruction +{ + struct tgsi_instruction Instruction; +/* + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; +*/ + struct tgsi_instruction_texture Texture; + struct i915_full_dst_register Dst[1]; + struct i915_full_src_register Src[3]; +}; + + +union i915_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct i915_full_instruction FullInstruction; + struct tgsi_full_property FullProperty; +}; + +struct i915_token_list +{ + union i915_full_token* Tokens; + unsigned NumTokens; +}; + +extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens); + +extern void i915_optimize_free(struct i915_token_list* tokens); + #endif diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index d28595e0fd3..c4a42df7882 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -369,7 +369,6 @@ i915_emit_const4f(struct i915_fp_compile * p, // XXX emit swizzle here for 0, 1, -1 and any combination thereof // we can use swizzle + neg for that - printf("const %f %f %f %f\n",c0,c1,c2,c3); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c new file mode 100644 index 00000000000..5c60d952de2 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -0,0 +1,182 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index); +} + +static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index && + d1->Register.Absolute == d2->Register.Absolute && + d1->Register.Negate == d2->Register.Negate); +} + + +/* + * Optimize away things like: + * MUL OUT[0].xyz, TEMP[1], TEMP[2] + * MOV OUT[0].w, TEMP[2] + * into: + * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] + * This is useful for optimizing texenv. + */ +static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next) +{ + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && + next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE; + return; + } + + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && + next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE; + return; + } +} + +static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) +{ + o->File = i->File; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; + o->SwizzleX = i->SwizzleX; + o->SwizzleY = i->SwizzleY; + o->SwizzleZ = i->SwizzleZ; + o->SwizzleW = i->SwizzleW; + o->Absolute = i->Absolute; + o->Negate = i->Negate; +} + +static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) +{ + o->File = i->File; + o->WriteMask = i->WriteMask; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; +} + +static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) +{ + memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); + memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); + + copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); + + copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); + copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); + copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); +} + +static void copy_token(union i915_full_token* o, union tgsi_full_token* i) +{ + if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + memcpy(o, i, sizeof(*o)); + else + copy_instruction(&o->FullInstruction, &i->FullInstruction); + +} + +struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) +{ + struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); + out_tokens->NumTokens = 0; + struct tgsi_parse_context parse; + int i = 0; + + /* Count the tokens */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + out_tokens->NumTokens++; + } + tgsi_parse_free (&parse); + + /* Allocate our tokens */ + out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); + + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + copy_token(&out_tokens->Tokens[i] , &parse.FullToken); + + if (i > 0) + i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + + i++; + } + tgsi_parse_free (&parse); + + return out_tokens; +} + +void i915_optimize_free(struct i915_token_list* tokens) +{ + free(tokens->Tokens); + free(tokens); +} + + diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index df606e27e26..e19d9be04de 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -172,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source, + const struct i915_full_src_register *source, struct i915_fragment_shader* fs) { uint index = source->Register.Index; @@ -287,7 +287,7 @@ src_vector(struct i915_fp_compile *p, */ static uint get_result_vector(struct i915_fp_compile *p, - const struct tgsi_full_dst_register *dest) + const struct i915_full_dst_register *dest) { switch (dest->Register.File) { case TGSI_FILE_OUTPUT: @@ -316,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p, * Compute flags for saturation and writemask. */ static uint -get_result_flags(const struct tgsi_full_instruction *inst) +get_result_flags(const struct i915_full_instruction *inst) { const uint writeMask = inst->Dst[0].Register.WriteMask; @@ -378,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) */ static void emit_tex(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, struct i915_fragment_shader* fs) { @@ -404,7 +404,7 @@ emit_tex(struct i915_fp_compile *p, */ static void emit_simple_arith(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { @@ -429,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p, /** As above, but swap the first two src regs */ static void emit_simple_arith_swap2(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { - struct tgsi_full_instruction inst2; + struct i915_full_instruction inst2; assert(numArgs == 2); @@ -457,7 +457,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, struct i915_fragment_shader *fs) { uint writemask; @@ -728,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_MUL, 2, fs); break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_POW: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); @@ -1044,6 +1047,93 @@ i915_translate_instruction(struct i915_fp_compile *p, } +static void i915_translate_token(struct i915_fp_compile *p, + const union i915_full_token* token, + struct i915_fragment_shader *fs) +{ + struct i915_fragment_shader *ifs = p->shader; + switch( token->Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(token->FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (token->FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } + } + else if (token->FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + if (i >= I915_MAX_TEMPORARY) + debug_printf("Too many temps (%d)\n",i); + else + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &token->FullImmediate; + const uint pos = p->num_immediates++; + uint j; + assert( imm->Immediate.NrTokens <= 4 + 1 ); + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { + p->immediates[pos][j] = imm->u[j].Float; + } + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + + i915_translate_instruction(p, &token->FullInstruction, fs); + break; + + default: + assert( 0 ); + } + +} + /** * Translate TGSI fragment shader into i915 hardware instructions. * \param p the translation state @@ -1051,100 +1141,13 @@ i915_translate_instruction(struct i915_fp_compile *p, */ static void i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens, + const struct i915_token_list *tokens, struct i915_fragment_shader *fs) { - struct i915_fragment_shader *ifs = p->shader; - struct tgsi_parse_context parse; - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_PROPERTY: - /* - * We only support one cbuf, but we still need to ignore the property - * correctly so we don't hit the assert at the end of the switch case. - */ - assert(parse.FullToken.FullProperty.Property.PropertyName == - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); - break; - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_CONSTANT) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - assert(ifs->constant_flags[i] == 0x0); - ifs->constant_flags[i] = I915_CONSTFLAG_USER; - ifs->num_constants = MAX2(ifs->num_constants, i + 1); - } - } - else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_TEMPORARY) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - if (i >= I915_MAX_TEMPORARY) - debug_printf("Too many temps (%d)\n",i); - else - /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ - p->temp_flag |= (1 << i); /* mark temp as used */ - } - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm - = &parse.FullToken.FullImmediate; - const uint pos = p->num_immediates++; - uint j; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { - p->immediates[pos][j] = imm->u[j].Float; - } - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (p->first_instruction) { - /* resolve location of immediates */ - uint i, j; - for (i = 0; i < p->num_immediates; i++) { - /* find constant slot for this immediate */ - for (j = 0; j < I915_MAX_CONSTANT; j++) { - if (ifs->constant_flags[j] == 0x0) { - memcpy(ifs->constants[j], - p->immediates[i], - 4 * sizeof(float)); - /*printf("immediate %d maps to const %d\n", i, j);*/ - ifs->constant_flags[j] = 0xf; /* all four comps used */ - p->immediates_map[i] = j; - ifs->num_constants = MAX2(ifs->num_constants, j + 1); - break; - } - } - } - - p->first_instruction = FALSE; - } - - i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); - break; - - default: - assert( 0 ); - } - - } /* while */ - - tgsi_parse_free (&parse); + int i; + for(i = 0; iNumTokens; i++) { + i915_translate_token(p, &tokens->Tokens[i], fs); + } } @@ -1303,8 +1306,10 @@ i915_translate_fragment_program( struct i915_context *i915, p = i915_init_compile(i915, fs); - i915_translate_instructions(p, tokens, fs); + struct i915_token_list* i_tokens = i915_optimize(tokens); + i915_translate_instructions(p, i_tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); + i915_optimize_free(i_tokens); } From e53b41f2632af26c79c4f7ed5b0fdb86a1feb602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Wed, 6 Jul 2011 02:19:48 -0700 Subject: [PATCH 014/113] i915g: Improve flushing using heuristics. --- src/gallium/drivers/i915/i915_batch.h | 18 +++++++++++++++++- src/gallium/drivers/i915/i915_clear.c | 5 +++++ src/gallium/drivers/i915/i915_context.h | 2 ++ src/gallium/drivers/i915/i915_flush.c | 1 + src/gallium/drivers/i915/i915_prim_emit.c | 2 ++ src/gallium/drivers/i915/i915_prim_vbuf.c | 3 +++ src/gallium/drivers/i915/i915_winsys.h | 6 ++++++ src/gallium/winsys/i915/drm/i915_drm_buffer.c | 10 ++++++++++ 8 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index ce2691b2fd7..a1f8bcae802 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -29,6 +29,7 @@ #define I915_BATCH_H #include "i915_batchbuffer.h" +#include "i915_context.h" #define BEGIN_BATCH(dwords) \ @@ -49,11 +50,26 @@ #define FLUSH_BATCH(fence) \ i915_flush(i915, fence) - /************************************************************************ * i915_flush.c */ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence); +/* + * Flush if the current color buf is idle and we have more than 256 vertices + * queued, or if the current color buf is busy and we have more than 4096 + * vertices queued. + */ +static INLINE void i915_flush_heuristically(struct i915_context* i915, + int num_vertex) +{ + struct i915_winsys *iws = i915->iws; + i915->vertices_since_last_flush += num_vertex; + if ( i915->vertices_since_last_flush > 4096 + || ( i915->vertices_since_last_flush > 256 && + !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) ) + FLUSH_BATCH(NULL); +} + #endif diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index fcb208d6dae..e1d6a749cdc 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -120,6 +120,11 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, OUT_BATCH_F(desty + height); OUT_BATCH_F(destx); OUT_BATCH_F(desty); + + /* Flush after clear, its expected to be a costly operation. + * This is not required, just a heuristic + */ + FLUSH_BATCH(NULL); } /** diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index c964208fedd..84862351ffe 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -264,6 +264,8 @@ struct i915_context { struct util_slab_mempool transfer_pool; struct util_slab_mempool texture_transfer_pool; + int vertices_since_last_flush; + /** blitter/hw-clear */ struct blitter_context* blitter; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index b4e81147c4f..6d76afa9dbc 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -77,4 +77,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->static_dirty = ~0; /* kernel emits flushes in between batchbuffers */ i915->flush_dirty = 0; + i915->vertices_since_last_flush = 0; } diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index 85656cd7846..1acde97d4bd 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -166,6 +166,8 @@ emit_prim( struct draw_stage *stage, for (i = 0; i < nr; i++) emit_hw_vertex(i915, prim->v[i]); + + i915_flush_heuristically(i915, nr); } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 79db3b650eb..d8ae1de2963 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -487,6 +487,7 @@ draw_arrays_fallback(struct vbuf_render *render, draw_arrays_generate_indices(render, start, nr, i915_render->fallback); + i915_flush_heuristically(i915, nr_indices); out: return; } @@ -534,6 +535,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, nr); OUT_BATCH(start); /* Beginning vertex index */ + i915_flush_heuristically(i915, nr); out: return; } @@ -657,6 +659,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, save_nr_indices, i915_render->fallback); + i915_flush_heuristically(i915, nr_indices); out: return; } diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 21cfdc9613e..20438609e07 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -207,6 +207,12 @@ struct i915_winsys { void (*buffer_destroy)(struct i915_winsys *iws, struct i915_winsys_buffer *buffer); + + /** + * Check if a buffer is busy. + */ + boolean (*buffer_is_busy)(struct i915_winsys *iws, + struct i915_winsys_buffer *buffer); /*@}*/ diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c index 01dd4bf062f..89d8e89e6a7 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c @@ -213,6 +213,15 @@ i915_drm_buffer_destroy(struct i915_winsys *iws, FREE(buffer); } +static boolean +i915_drm_buffer_is_busy(struct i915_winsys *iws, + struct i915_winsys_buffer *buffer) +{ + struct i915_drm_buffer* i915_buffer = i915_drm_buffer(buffer); + return drm_intel_bo_busy(i915_buffer->bo); +} + + void i915_drm_winsys_init_buffer_functions(struct i915_drm_winsys *idws) { @@ -224,4 +233,5 @@ i915_drm_winsys_init_buffer_functions(struct i915_drm_winsys *idws) idws->base.buffer_unmap = i915_drm_buffer_unmap; idws->base.buffer_write = i915_drm_buffer_write; idws->base.buffer_destroy = i915_drm_buffer_destroy; + idws->base.buffer_is_busy = i915_drm_buffer_is_busy; } From f747d03b1d3aa4e63417bd8486909f63b4a33be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=B6lgyes=20D=C3=A1vid?= Date: Wed, 6 Jul 2011 10:10:20 -0400 Subject: [PATCH 015/113] Fixes for leaks reported by cppcheck. --- src/egl/drivers/dri2/egl_dri2.c | 8 ++++++-- src/egl/drivers/dri2/platform_x11.c | 1 + src/egl/wayland/wayland-drm/wayland-drm.c | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 5680c360f1d..35a598ecab8 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -923,8 +923,10 @@ dri2_create_image_khr_renderbuffer(_EGLDisplay *disp, _EGLContext *ctx, return EGL_NO_IMAGE_KHR; } - if (!_eglInitImage(&dri2_img->base, disp)) + if (!_eglInitImage(&dri2_img->base, disp)) { + free(dri2_img); return EGL_NO_IMAGE_KHR; + } dri2_img->dri_image = dri2_dpy->image->createImageFromRenderbuffer(dri2_ctx->dri_context, @@ -1335,8 +1337,10 @@ _EGL_MAIN(const char *args) memset(dri2_drv, 0, sizeof *dri2_drv); - if (!dri2_load(&dri2_drv->base)) + if (!dri2_load(&dri2_drv->base)) { + free(dri2_drv); return NULL; + } _eglInitDriverFallbacks(&dri2_drv->base); dri2_drv->base.API.Initialize = dri2_initialize; diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 4e00c958cbd..f27bf176fb6 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -845,6 +845,7 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx, if (!_eglInitImage(&dri2_img->base, disp)) { free(buffers_reply); free(geometry_reply); + free(dri2_img); return EGL_NO_IMAGE_KHR; } diff --git a/src/egl/wayland/wayland-drm/wayland-drm.c b/src/egl/wayland/wayland-drm/wayland-drm.c index 3023cd02b07..ec96c045143 100644 --- a/src/egl/wayland/wayland-drm/wayland-drm.c +++ b/src/egl/wayland/wayland-drm/wayland-drm.c @@ -104,6 +104,7 @@ drm_create_buffer(struct wl_client *client, struct wl_drm *drm, wl_client_post_error(client, &drm->object, WL_DRM_ERROR_INVALID_VISUAL, "invalid visual"); + free(buffer); return; } From eafc74d7d4982a835ac43c73963dda9982652464 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 29 Jun 2011 16:47:30 -0700 Subject: [PATCH 016/113] i965/fs: Fix message register allocation in FB writes. Commit 6750226e6d915742ebf96bae2cfcdd287b85db35 bumped the base MRF to m2 instead of m0, but failed to adjust inst->mlen, which was being set to the highest MRF. Subtracting the base MRF solves the issue. Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9091014976b..cbe5cf428c5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1791,7 +1791,8 @@ fs_visitor::emit_fb_writes() { this->current_annotation = "FB write header"; GLboolean header_present = GL_TRUE; - int nr = 2; + int base_mrf = 2; + int nr = base_mrf; int reg_width = c->dispatch_width / 8; if (intel->gen >= 6 && @@ -1870,8 +1871,8 @@ fs_visitor::emit_fb_writes() fs_inst *inst = emit(FS_OPCODE_FB_WRITE); inst->target = target; - inst->base_mrf = 2; - inst->mlen = nr; + inst->base_mrf = base_mrf; + inst->mlen = nr - base_mrf; if (target == c->key.nr_color_regions - 1) inst->eot = true; inst->header_present = header_present; @@ -1888,8 +1889,8 @@ fs_visitor::emit_fb_writes() } fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->base_mrf = 2; - inst->mlen = nr; + inst->base_mrf = base_mrf; + inst->mlen = nr - base_mrf; inst->eot = true; inst->header_present = header_present; } From abbbd14dd440cfbbe8b42279cf95c30eec5b495d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 5 Jul 2011 21:59:33 -0700 Subject: [PATCH 017/113] glsl: Fix make clean for dricore. --- src/glsl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 4100414a37d..e0776c1b55d 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -154,7 +154,7 @@ clean: clean-dricore -rm -f $(APPS) clean-dricore: - -rm -f $(DRICORE_OBJ_DIR) $(TOP)/$(LIB_DIR)/libglsl.so libglsl.so + -rm -f $(OBJECTS_DRICORE) $(TOP)/$(LIB_DIR)/libglsl.so libglsl.so ifneq (,$(DRICORE_GLSL_LIBS)) DRICORE_INSTALL_TARGET = install-dricore From b043409adfa6ffa6dc78331258de52f7fa6d59aa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 5 Jul 2011 22:02:33 -0700 Subject: [PATCH 018/113] glsl: Suppress warning from matching_signature change. gcc isn't smart enough to see that we only look at matched_score after we've initialized it (because match != NULL happens at the same time) --- src/glsl/ir_function.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index ef8d4fcfcd4..1255072a571 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -165,7 +165,7 @@ ir_function_signature * ir_function::matching_signature(const exec_list *actual_parameters) { ir_function_signature *match = NULL; - int matched_score; + int matched_score = 0; foreach_iter(exec_list_iterator, iter, signatures) { ir_function_signature *const sig = From e9d563e3ffed8eadde41c8cb25eaa42e20e9688f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 28 Jun 2011 13:55:44 -0700 Subject: [PATCH 019/113] mesa: Don't skip glGetProgramEnvParam4dvARB if there was already an error. Fixes a bug caught by oglconform, and now piglit ARB_vertex_program/getenv4d-with-error. The wrapping of an existing GL function made it so that we couldn't distinguish an error in looking up our arguments from an existing error. Instead, make a helper function to choose the param, and use it from multiple callers. v2: Move the success case line into the conditional, use COPY_4V more. --- src/mesa/main/arbprogram.c | 112 +++++++++++++++---------------------- 1 file changed, 46 insertions(+), 66 deletions(-) diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c index 26d781954ed..a45ca1b8665 100644 --- a/src/mesa/main/arbprogram.c +++ b/src/mesa/main/arbprogram.c @@ -269,6 +269,33 @@ _mesa_IsProgramARB(GLuint id) return GL_FALSE; } +static GLboolean +get_env_param_pointer(struct gl_context *ctx, const char *func, + GLenum target, GLuint index, GLfloat **param) +{ + if (target == GL_FRAGMENT_PROGRAM_ARB + && ctx->Extensions.ARB_fragment_program) { + if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index)", func); + return GL_FALSE; + } + *param = ctx->FragmentProgram.Parameters[index]; + return GL_TRUE; + } + else if (target == GL_VERTEX_PROGRAM_ARB && + (ctx->Extensions.ARB_vertex_program || + ctx->Extensions.NV_vertex_program)) { + if (index >= ctx->Const.VertexProgram.MaxEnvParams) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index)", func); + return GL_FALSE; + } + *param = ctx->VertexProgram.Parameters[index]; + return GL_TRUE; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", func); + return GL_FALSE; + } +} void GLAPIENTRY _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, @@ -383,30 +410,16 @@ void GLAPIENTRY _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { + GLfloat *param; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); - if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter(index)"); - return; - } - ASSIGN_4V(ctx->FragmentProgram.Parameters[index], x, y, z, w); - } - else if (target == GL_VERTEX_PROGRAM_ARB /* == GL_VERTEX_PROGRAM_NV */ - && (ctx->Extensions.ARB_vertex_program || ctx->Extensions.NV_vertex_program)) { - if (index >= ctx->Const.VertexProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter(index)"); - return; - } - ASSIGN_4V(ctx->VertexProgram.Parameters[index], x, y, z, w); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramEnvParameter(target)"); - return; + if (get_env_param_pointer(ctx, "glProgramEnvParameter", + target, index, ¶m)) { + ASSIGN_4V(param, x, y, z, w); } } @@ -422,32 +435,16 @@ void GLAPIENTRY _mesa_ProgramEnvParameter4fvARB(GLenum target, GLuint index, const GLfloat *params) { + GLfloat *param; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); - if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter4fv(index)"); - return; - } - memcpy(ctx->FragmentProgram.Parameters[index], params, - 4 * sizeof(GLfloat)); - } - else if (target == GL_VERTEX_PROGRAM_ARB /* == GL_VERTEX_PROGRAM_NV */ - && (ctx->Extensions.ARB_vertex_program || ctx->Extensions.NV_vertex_program)) { - if (index >= ctx->Const.VertexProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter4fv(index)"); - return; - } - memcpy(ctx->VertexProgram.Parameters[index], params, - 4 * sizeof(GLfloat)); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramEnvParameter4fv(target)"); - return; + if (get_env_param_pointer(ctx, "glProgramEnvParameter4fv", + target, index, ¶m)) { + memcpy(param, params, 4 * sizeof(GLfloat)); } } @@ -496,14 +493,11 @@ _mesa_GetProgramEnvParameterdvARB(GLenum target, GLuint index, GLdouble *params) { GET_CURRENT_CONTEXT(ctx); - GLfloat fparams[4]; + GLfloat *fparam; - _mesa_GetProgramEnvParameterfvARB(target, index, fparams); - if (ctx->ErrorValue == GL_NO_ERROR) { - params[0] = fparams[0]; - params[1] = fparams[1]; - params[2] = fparams[2]; - params[3] = fparams[3]; + if (get_env_param_pointer(ctx, "glGetProgramEnvParameterdv", + target, index, &fparam)) { + COPY_4V(params, fparam); } } @@ -512,29 +506,15 @@ void GLAPIENTRY _mesa_GetProgramEnvParameterfvARB(GLenum target, GLuint index, GLfloat *params) { + GLfloat *param; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramEnvParameter(index)"); - return; - } - COPY_4V(params, ctx->FragmentProgram.Parameters[index]); - } - else if (target == GL_VERTEX_PROGRAM_ARB - && ctx->Extensions.ARB_vertex_program) { - if (index >= ctx->Const.VertexProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramEnvParameter(index)"); - return; - } - COPY_4V(params, ctx->VertexProgram.Parameters[index]); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramEnvParameter(target)"); - return; + if (get_env_param_pointer(ctx, "glGetProgramEnvParameterfv", + target, index, ¶m)) { + COPY_4V(params, param); } } From c9aac11713c6238a8e3a89e8501e6e686fa811a7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 28 Jun 2011 15:04:22 -0700 Subject: [PATCH 020/113] mesa: Don't skip glGetProgramLocalParam4dvARB if there was already an error. Like the previous commit, but fixes ARB_vertex_program/getlocal4d-with-error. v2: Move the success case line into the conditional, use ASSIGN_4V more. --- src/mesa/main/arbprogram.c | 117 +++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 64 deletions(-) diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c index a45ca1b8665..b83369d9e04 100644 --- a/src/mesa/main/arbprogram.c +++ b/src/mesa/main/arbprogram.c @@ -269,6 +269,44 @@ _mesa_IsProgramARB(GLuint id) return GL_FALSE; } +static GLboolean +get_local_param_pointer(struct gl_context *ctx, const char *func, + GLenum target, GLuint index, GLfloat **param) +{ + struct gl_program *prog; + GLuint maxParams; + + if (target == GL_VERTEX_PROGRAM_ARB + && ctx->Extensions.ARB_vertex_program) { + prog = &(ctx->VertexProgram.Current->Base); + maxParams = ctx->Const.VertexProgram.MaxLocalParams; + } + else if (target == GL_FRAGMENT_PROGRAM_ARB + && ctx->Extensions.ARB_fragment_program) { + prog = &(ctx->FragmentProgram.Current->Base); + maxParams = ctx->Const.FragmentProgram.MaxLocalParams; + } + else if (target == GL_FRAGMENT_PROGRAM_NV + && ctx->Extensions.NV_fragment_program) { + prog = &(ctx->FragmentProgram.Current->Base); + maxParams = MAX_NV_FRAGMENT_PROGRAM_PARAMS; + } + else { + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(target)", func); + return GL_FALSE; + } + + if (index >= maxParams) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index)", func); + return GL_FALSE; + } + + *param = prog->LocalParams[index]; + return GL_TRUE; +} + + static GLboolean get_env_param_pointer(struct gl_context *ctx, const char *func, GLenum target, GLuint index, GLfloat **param) @@ -527,39 +565,16 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { GET_CURRENT_CONTEXT(ctx); - struct gl_program *prog; + GLfloat *param; ASSERT_OUTSIDE_BEGIN_END(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); - if ((target == GL_FRAGMENT_PROGRAM_NV - && ctx->Extensions.NV_fragment_program) || - (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program)) { - if (index >= ctx->Const.FragmentProgram.MaxLocalParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameterARB"); - return; - } - prog = &(ctx->FragmentProgram.Current->Base); + if (get_local_param_pointer(ctx, "glProgramLocalParameterARB", + target, index, ¶m)) { + ASSERT(index < MAX_PROGRAM_LOCAL_PARAMS); + ASSIGN_4V(param, x, y, z, w); } - else if (target == GL_VERTEX_PROGRAM_ARB - && ctx->Extensions.ARB_vertex_program) { - if (index >= ctx->Const.VertexProgram.MaxLocalParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameterARB"); - return; - } - prog = &(ctx->VertexProgram.Current->Base); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramLocalParameterARB"); - return; - } - - ASSERT(index < MAX_PROGRAM_LOCAL_PARAMS); - prog->LocalParams[index][0] = x; - prog->LocalParams[index][1] = y; - prog->LocalParams[index][2] = z; - prog->LocalParams[index][3] = w; } @@ -647,41 +662,14 @@ void GLAPIENTRY _mesa_GetProgramLocalParameterfvARB(GLenum target, GLuint index, GLfloat *params) { - const struct gl_program *prog; - GLuint maxParams; + GLfloat *param; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - if (target == GL_VERTEX_PROGRAM_ARB - && ctx->Extensions.ARB_vertex_program) { - prog = &(ctx->VertexProgram.Current->Base); - maxParams = ctx->Const.VertexProgram.MaxLocalParams; + if (get_local_param_pointer(ctx, "glProgramLocalParameters4fvEXT", + target, index, ¶m)) { + COPY_4V(params, param); } - else if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - prog = &(ctx->FragmentProgram.Current->Base); - maxParams = ctx->Const.FragmentProgram.MaxLocalParams; - } - else if (target == GL_FRAGMENT_PROGRAM_NV - && ctx->Extensions.NV_fragment_program) { - prog = &(ctx->FragmentProgram.Current->Base); - maxParams = MAX_NV_FRAGMENT_PROGRAM_PARAMS; - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetProgramLocalParameterARB(target)"); - return; - } - - if (index >= maxParams) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glGetProgramLocalParameterARB(index)"); - return; - } - - ASSERT(prog); - ASSERT(index < MAX_PROGRAM_LOCAL_PARAMS); - COPY_4V(params, prog->LocalParams[index]); } @@ -692,12 +680,13 @@ void GLAPIENTRY _mesa_GetProgramLocalParameterdvARB(GLenum target, GLuint index, GLdouble *params) { + GLfloat *param; GET_CURRENT_CONTEXT(ctx); - GLfloat floatParams[4]; - ASSIGN_4V(floatParams, 0.0F, 0.0F, 0.0F, 0.0F); - _mesa_GetProgramLocalParameterfvARB(target, index, floatParams); - if (ctx->ErrorValue == GL_NO_ERROR) { - COPY_4V(params, floatParams); + ASSERT_OUTSIDE_BEGIN_END(ctx); + + if (get_local_param_pointer(ctx, "glProgramLocalParameters4fvEXT", + target, index, ¶m)) { + COPY_4V(params, param); } } From 07e5295b6fe77253ea38dbc0d649b20e7275373a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 28 Jun 2011 16:39:15 -0700 Subject: [PATCH 021/113] mesa: Don't error on glFeedbackBuffer(size = 0, buffer = NULL) The existing error result doesn't appear in the GL 2.1 or 3.2 compatibility specs, and triggers an unexpected GL error in Intel's oglconform when it tries to reset the feedback state after usage so that the "diff the state at error time vs. context init time" code doesn't generate spurious diffs. The unexpected GL error then translates into testcase failure. Brian wants the safety check on buffer = NULL, though, so that people can't as easily set up a broken buffer. --- src/mesa/main/feedback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/feedback.c b/src/mesa/main/feedback.c index fcb089f1f31..597ec1e3f9a 100644 --- a/src/mesa/main/feedback.c +++ b/src/mesa/main/feedback.c @@ -64,7 +64,7 @@ _mesa_FeedbackBuffer( GLsizei size, GLenum type, GLfloat *buffer ) _mesa_error( ctx, GL_INVALID_VALUE, "glFeedbackBuffer(size<0)" ); return; } - if (!buffer) { + if (!buffer && size > 0) { _mesa_error( ctx, GL_INVALID_VALUE, "glFeedbackBuffer(buffer==NULL)" ); ctx->Feedback.BufferSize = 0; return; From 053af6ac8cda226a62844fc014ed9f133557c111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Wed, 6 Jul 2011 11:44:19 -0700 Subject: [PATCH 022/113] i915g: Fix optimization, also make it more generic. --- src/gallium/drivers/i915/i915_fpc_optimize.c | 28 ++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index 5c60d952de2..738468d8ff8 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -54,6 +54,22 @@ static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_ d1->Register.Negate == d2->Register.Negate); } +static boolean is_unswizzled(struct i915_full_src_register* r, + int sx, + int sy, + int sz, + int sw) +{ + if (sx && r->Register.SwizzleX != TGSI_SWIZZLE_X) + return FALSE; + if (sy && r->Register.SwizzleY != TGSI_SWIZZLE_Y) + return FALSE; + if (sz && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) + return FALSE; + if (sw && r->Register.SwizzleW != TGSI_SWIZZLE_W) + return FALSE; + return FALSE; +} /* * Optimize away things like: @@ -72,11 +88,15 @@ static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, unio current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && - same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) ) + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && + is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && + is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && + is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE; + current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; return; } @@ -87,11 +107,15 @@ static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, unio current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && - same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) ) + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && + is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && + is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && + is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE; + current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; return; } } From 88349255ce19b894e98fdf63a47c8219e601798f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jul 2011 12:15:10 -0600 Subject: [PATCH 023/113] softpipe: minor clean-ups in sp_quad_depth_test.c --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 89b2a91fc1f..3127d22d8f4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -189,7 +189,8 @@ convert_quad_depth( struct depth_data *data, /** - * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + * Compute the depth_data::shader_stencil_refs[] values from the float + * fragment stencil values. */ static void convert_quad_stencil( struct depth_data *data, @@ -205,10 +206,8 @@ convert_quad_stencil( struct depth_data *data, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - { - for (j = 0; j < QUAD_SIZE; j++) { - data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); - } + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); } break; default: @@ -216,6 +215,7 @@ convert_quad_stencil( struct depth_data *data, } } + /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. */ From 2ea7b374f6111a3aab2589d915dd3fcd309a59ca Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jul 2011 15:02:25 -0600 Subject: [PATCH 024/113] tgsi: add some debug/print code in exec_tex(), disabled --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9cf74a838fe..712e8aca794 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1839,6 +1839,17 @@ exec_tex(struct tgsi_exec_machine *mach, assert(0); } +#if 0 + debug_printf("fetch r: %g %g %g %g\n", + r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); + debug_printf("fetch g: %g %g %g %g\n", + r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); + debug_printf("fetch b: %g %g %g %g\n", + r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); + debug_printf("fetch a: %g %g %g %g\n", + r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); +#endif + for (chan = 0; chan < NUM_CHANNELS; chan++) { if (inst->Dst[0].Register.WriteMask & (1 << chan)) { store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); From 057a107d4433eefce0ac99810a6e182f19fa64a6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jul 2011 15:02:58 -0600 Subject: [PATCH 025/113] softpipe: add missing stencil format case in convert_quad_stencil() Part of the fix for https://bugs.freedesktop.org/show_bug.cgi?id=38729 NOTE: This is a candidate for the 7.11 branch --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 3127d22d8f4..15f3a8fd813 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -206,6 +206,7 @@ convert_quad_stencil( struct depth_data *data, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_S8_USCALED: for (j = 0; j < QUAD_SIZE; j++) { data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); } From b786db06540472beda9cedd18937d6e12855b3eb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jul 2011 15:04:17 -0600 Subject: [PATCH 026/113] mesa: fix texstore addressing bugs for depth/stencil formats Using GLuint pointers worked when the pixel size was four bytes or the row stride was a multiple of four but was otherwise broken. Fixes failures found with the piglit fbo-stencil test. This helps to fix https://bugs.freedesktop.org/show_bug.cgi?id=38729 NOTE: This is a candidate for the 7.11 branch. --- src/mesa/main/texstore.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 5c925a3d314..e527981ff47 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -3303,8 +3303,7 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffff; const GLint srcRowStride - = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) - / sizeof(GLuint); + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; ASSERT(dstFormat == MESA_FORMAT_Z24_S8); @@ -3332,8 +3331,8 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) + dstImageOffsets[dstZoffset + img] + dstYoffset * dstRowStride / sizeof(GLuint) + dstXoffset; - const GLuint *src - = (const GLuint *) _mesa_image_address(dims, srcPacking, srcAddr, + const GLubyte *src + = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, 0, 0); @@ -3390,8 +3389,7 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffff; const GLint srcRowStride - = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) - / sizeof(GLuint); + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; ASSERT(dstFormat == MESA_FORMAT_S8_Z24); @@ -3406,8 +3404,8 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) + dstImageOffsets[dstZoffset + img] + dstYoffset * dstRowStride / sizeof(GLuint) + dstXoffset; - const GLuint *src - = (const GLuint *) _mesa_image_address(dims, srcPacking, srcAddr, + const GLubyte *src + = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, 0, 0); @@ -3479,8 +3477,7 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) } else { const GLint srcRowStride - = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) - / sizeof(GLuint); + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; for (img = 0; img < srcDepth; img++) { @@ -3488,8 +3485,8 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) + dstImageOffsets[dstZoffset + img] + dstYoffset * dstRowStride / sizeof(GLuint) + dstXoffset; - const GLuint *src - = (const GLuint *) _mesa_image_address(dims, srcPacking, srcAddr, + const GLubyte *src + = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, 0, 0); From 31484b068d4d2842593498c75ec831dfa75af14e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Wed, 6 Jul 2011 14:10:51 -0700 Subject: [PATCH 027/113] i915g: Fix optimize so that it actually gets used. --- src/gallium/drivers/i915/i915_fpc_optimize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index 738468d8ff8..aee9dbde357 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -68,7 +68,7 @@ static boolean is_unswizzled(struct i915_full_src_register* r, return FALSE; if (sw && r->Register.SwizzleW != TGSI_SWIZZLE_W) return FALSE; - return FALSE; + return TRUE; } /* From 0eb97979584b73907327eebc547302e6b8d8976a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 27 Jun 2011 16:33:13 -0700 Subject: [PATCH 028/113] glsl: Track initial mask in constant propagation live set The set of values initially available (before any kills) must be tracked with each constant in the set. Otherwise the wrong component can be selected after earlier components have been killed. NOTE: This is a candidate for the 7.10 and 7.11 branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=37383 Reviewed-by: Eric Anholt Cc: Kenneth Graunke Cc: Matthias Bentrup --- src/glsl/opt_constant_propagation.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp index 4425f421193..af77e490689 100644 --- a/src/glsl/opt_constant_propagation.cpp +++ b/src/glsl/opt_constant_propagation.cpp @@ -51,11 +51,23 @@ public: this->var = var; this->write_mask = write_mask; this->constant = constant; + this->initial_values = write_mask; + } + + acp_entry(const acp_entry *src) + { + this->var = src->var; + this->write_mask = src->write_mask; + this->constant = src->constant; + this->initial_values = src->initial_values; } ir_variable *var; ir_constant *constant; unsigned write_mask; + + /** Mask of values initially available in the constant. */ + unsigned initial_values; }; @@ -172,7 +184,7 @@ ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) for (int j = 0; j < 4; j++) { if (j == channel) break; - if (found->write_mask & (1 << j)) + if (found->initial_values & (1 << j)) rhs_channel++; } @@ -285,8 +297,7 @@ ir_constant_propagation_visitor::handle_if_block(exec_list *instructions) /* Populate the initial acp with a constant of the original */ foreach_iter(exec_list_iterator, iter, *orig_acp) { acp_entry *a = (acp_entry *)iter.get(); - this->acp->push_tail(new(this->mem_ctx) acp_entry(a->var, a->write_mask, - a->constant)); + this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); } visit_list_elements(this, instructions); From dbda466fc05a6262ba857a7887e16347cf3d3e96 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 24 Jun 2011 17:12:31 -0700 Subject: [PATCH 029/113] ir_to_mesa: Allocate temporary instructions on the visitor's ralloc context And don't delete them. Let ralloc clean them up. Deleting the temporary IR leaves dangling references in the prog_instruction. That results in a bad dereference when printing the IR with MESA_GLSL=dump. NOTE: This is a candidate for the 7.10 and 7.11 branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38584 Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke --- src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 67adb8f3dcd..738e97ca55c 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -803,48 +803,44 @@ ir_to_mesa_visitor::visit(ir_loop *ir) ir_dereference_variable *counter = NULL; if (ir->counter != NULL) - counter = new(ir) ir_dereference_variable(ir->counter); + counter = new(mem_ctx) ir_dereference_variable(ir->counter); if (ir->from != NULL) { assert(ir->counter != NULL); - ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + ir_assignment *a = + new(mem_ctx) ir_assignment(counter, ir->from, NULL); a->accept(this); - delete a; } emit(NULL, OPCODE_BGNLOOP); if (ir->to) { ir_expression *e = - new(ir) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(ir) ir_if(e); + new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(mem_ctx) ir_if(e); - ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + ir_loop_jump *brk = + new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); if_stmt->then_instructions.push_tail(brk); if_stmt->accept(this); - - delete if_stmt; - delete e; - delete brk; } visit_exec_list(&ir->body_instructions, this); if (ir->increment) { ir_expression *e = - new(ir) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); + new(mem_ctx) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); - ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + ir_assignment *a = + new(mem_ctx) ir_assignment(counter, e, NULL); a->accept(this); - delete a; - delete e; } emit(NULL, OPCODE_ENDLOOP); From 174cef7fee7d400fc89a3ce68b7791d2aa3eb90f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 24 Jun 2011 17:30:41 -0700 Subject: [PATCH 030/113] glsl: Don't choke when printing an anonymous function parameter NOTE: This is a candidate for the 7.10 and 7.11 branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38584 Reviewed-by: Kenneth Graunke --- src/glsl/ir_print_visitor.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index 5b5409daa22..518910bd129 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -96,6 +96,16 @@ void ir_print_visitor::indent(void) const char * ir_print_visitor::unique_name(ir_variable *var) { + /* var->name can be NULL in function prototypes when a type is given for a + * parameter but no name is given. In that case, just return an empty + * string. Don't worry about tracking the generated name in the printable + * names hash because this is the only scope where it can ever appear. + */ + if (var->name == NULL) { + static unsigned arg = 1; + return ralloc_asprintf(this->mem_ctx, "parameter@%u", arg++); + } + /* Do we already have a name for this variable? */ const char *name = (const char *) hash_table_find(this->printable_names, var); if (name != NULL) From d32d4f780f9dad122adb63086da266aec6e88850 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 27 Jun 2011 17:59:58 -0700 Subject: [PATCH 031/113] linker: Assign locations for fragment shader output Fixes an assertion failure in the piglib out-01.frag ARB_explicit_attrib_location test. The locations set via the layout qualifier in fragment shader were not being applied to the shader outputs. As a result all of these variables still had a location of -1 set. This may need some more work for pre-3.0 contexts. The problem is dealing with generic outputs that lack a layout qualifier. There is no way for the application to specify a location (glBindFragDataLocation is not supported) or query the location assigned by the linker (glGetFragDataLocation is not supported). NOTE: This is a candidate for the 7.10 and 7.11 branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38624 Reviewed-by: Eric Anholt Cc: Kenneth Graunke Cc: Vinson Lee --- src/glsl/linker.cpp | 108 ++++++++++++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b6479e7a3a4..265da84e5a9 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1194,16 +1194,43 @@ find_available_slots(unsigned used_mask, unsigned needed_count) } +/** + * Assign locations for either VS inputs for FS outputs + * + * \param prog Shader program whose variables need locations assigned + * \param target_index Selector for the program target to receive location + * assignmnets. Must be either \c MESA_SHADER_VERTEX or + * \c MESA_SHADER_FRAGMENT. + * \param max_index Maximum number of generic locations. This corresponds + * to either the maximum number of draw buffers or the + * maximum number of generic attributes. + * + * \return + * If locations are successfully assigned, true is returned. Otherwise an + * error is emitted to the shader link log and false is returned. + * + * \bug + * Locations set via \c glBindFragDataLocation are not currently supported. + * Only locations assigned automatically by the linker, explicitly set by a + * layout qualifier, or explicitly set by a built-in variable (e.g., \c + * gl_FragColor) are supported for fragment shaders. + */ bool -assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index) +assign_attribute_or_color_locations(gl_shader_program *prog, + unsigned target_index, + unsigned max_index) { - /* Mark invalid attribute locations as being used. + /* Mark invalid locations as being used. */ - unsigned used_locations = (max_attribute_index >= 32) - ? ~0 : ~((1 << max_attribute_index) - 1); + unsigned used_locations = (max_index >= 32) + ? ~0 : ~((1 << max_index) - 1); - gl_shader *const sh = prog->_LinkedShaders[0]; - assert(sh->Type == GL_VERTEX_SHADER); + assert((target_index == MESA_SHADER_VERTEX) + || (target_index == MESA_SHADER_FRAGMENT)); + + gl_shader *const sh = prog->_LinkedShaders[target_index]; + if (sh == NULL) + return true; /* Operate in a total of four passes. * @@ -1220,9 +1247,16 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index * 4. Assign locations to any inputs without assigned locations. */ - invalidate_variable_locations(sh, ir_var_in, VERT_ATTRIB_GENERIC0); + const int generic_base = (target_index == MESA_SHADER_VERTEX) + ? VERT_ATTRIB_GENERIC0 : FRAG_RESULT_DATA0; - if (prog->Attributes != NULL) { + const enum ir_variable_mode direction = + (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out; + + + invalidate_variable_locations(sh, direction, generic_base); + + if ((target_index == MESA_SHADER_VERTEX) && (prog->Attributes != NULL)) { for (unsigned i = 0; i < prog->Attributes->NumParameters; i++) { ir_variable *const var = sh->symbols->get_variable(prog->Attributes->Parameters[i].Name); @@ -1309,15 +1343,15 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index foreach_list(node, sh->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - if ((var == NULL) || (var->mode != ir_var_in)) + if ((var == NULL) || (var->mode != direction)) continue; if (var->explicit_location) { const unsigned slots = count_attribute_slots(var->type); const unsigned use_mask = (1 << slots) - 1; - const int attr = var->location - VERT_ATTRIB_GENERIC0; + const int attr = var->location - generic_base; - if ((var->location >= (int)(max_attribute_index + VERT_ATTRIB_GENERIC0)) + if ((var->location >= (int)(max_index + generic_base)) || (var->location < 0)) { linker_error_printf(prog, "invalid explicit location %d specified for " @@ -1325,7 +1359,7 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index (var->location < 0) ? var->location : attr, var->name); return false; - } else if (var->location >= VERT_ATTRIB_GENERIC0) { + } else if (var->location >= generic_base) { used_locations |= (use_mask << attr); } } @@ -1349,14 +1383,16 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare); - /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can only - * be explicitly assigned by via glBindAttribLocation. Mark it as reserved - * to prevent it from being automatically allocated below. - */ - find_deref_visitor find("gl_Vertex"); - find.run(sh->ir); - if (find.variable_found()) - used_locations |= (1 << 0); + if (target_index == MESA_SHADER_VERTEX) { + /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can + * only be explicitly assigned by via glBindAttribLocation. Mark it as + * reserved to prevent it from being automatically allocated below. + */ + find_deref_visitor find("gl_Vertex"); + find.run(sh->ir); + if (find.variable_found()) + used_locations |= (1 << 0); + } for (unsigned i = 0; i < num_attr; i++) { /* Mask representing the contiguous slots that will be used by this @@ -1367,14 +1403,17 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index int location = find_available_slots(used_locations, to_assign[i].slots); if (location < 0) { + const char *const string = (target_index == MESA_SHADER_VERTEX) + ? "vertex shader input" : "fragment shader output"; + linker_error_printf(prog, "insufficient contiguous attribute locations " - "available for vertex shader input `%s'", - to_assign[i].var->name); + "available for %s `%s'", + string, to_assign[i].var->name); return false; } - to_assign[i].var->location = VERT_ATTRIB_GENERIC0 + location; + to_assign[i].var->location = generic_base + location; used_locations |= (use_mask << location); } @@ -1671,16 +1710,19 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) assign_uniform_locations(prog); - if (prog->_LinkedShaders[MESA_SHADER_VERTEX] != NULL) { - /* FINISHME: The value of the max_attribute_index parameter is - * FINISHME: implementation dependent based on the value of - * FINISHME: GL_MAX_VERTEX_ATTRIBS. GL_MAX_VERTEX_ATTRIBS must be - * FINISHME: at least 16, so hardcode 16 for now. - */ - if (!assign_attribute_locations(prog, 16)) { - prog->LinkStatus = false; - goto done; - } + /* FINISHME: The value of the max_attribute_index parameter is + * FINISHME: implementation dependent based on the value of + * FINISHME: GL_MAX_VERTEX_ATTRIBS. GL_MAX_VERTEX_ATTRIBS must be + * FINISHME: at least 16, so hardcode 16 for now. + */ + if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) { + prog->LinkStatus = false; + goto done; + } + + if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, ctx->Const.MaxDrawBuffers)) { + prog->LinkStatus = false; + goto done; } unsigned prev; From 7a10976adb65010bec7952a80d1b43d62b3f8bb3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jul 2011 17:18:32 -0600 Subject: [PATCH 032/113] i915g: move code after declaration --- src/gallium/drivers/i915/i915_fpc_optimize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index aee9dbde357..406945c6798 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -167,10 +167,11 @@ static void copy_token(union i915_full_token* o, union tgsi_full_token* i) struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) { struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); - out_tokens->NumTokens = 0; struct tgsi_parse_context parse; int i = 0; + out_tokens->NumTokens = 0; + /* Count the tokens */ tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { From 568d99cc6c8eea75ce50fe29e1ea8a94fe7ff7a7 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 6 Jul 2011 21:58:33 +0200 Subject: [PATCH 033/113] st/xa: Fix render to xa_format_a8, which is backed by a gallium L8 texture Signed-off-by: Thomas Hellstrom --- src/gallium/state_trackers/xa/xa_composite.c | 72 ++++++++++++-------- src/gallium/state_trackers/xa/xa_context.c | 9 ++- src/gallium/state_trackers/xa/xa_priv.h | 12 ++++ src/gallium/state_trackers/xa/xa_renderer.c | 8 ++- src/gallium/state_trackers/xa/xa_tgsi.c | 22 ++++-- 5 files changed, 86 insertions(+), 37 deletions(-) diff --git a/src/gallium/state_trackers/xa/xa_composite.c b/src/gallium/state_trackers/xa/xa_composite.c index 5389af6f363..cc294846e10 100644 --- a/src/gallium/state_trackers/xa/xa_composite.c +++ b/src/gallium/state_trackers/xa/xa_composite.c @@ -79,6 +79,25 @@ static const struct xa_composite_blend xa_blends[] = { }; +/* + * The alpha value stored in a luminance texture is read by the + * hardware as color. + */ +static unsigned +xa_convert_blend_for_luminance(unsigned factor) +{ + switch(factor) { + case PIPE_BLENDFACTOR_DST_ALPHA: + return PIPE_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return PIPE_BLENDFACTOR_INV_DST_COLOR; + default: + break; + } + return factor; +} + + static boolean blend_for_op(struct xa_composite_blend *blend, enum xa_composite_op op, @@ -104,15 +123,20 @@ blend_for_op(struct xa_composite_blend *blend, } } + if (!dst_pic->srf) + return supported; + + if (dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM) { + blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src); + blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst); + } /* * If there's no dst alpha channel, adjust the blend op so that we'll treat * it as always 1. */ - if (dst_pic && - xa_format_a(dst_pic->pict_format) == 0 && - blend->alpha_dst) { + if (xa_format_a(dst_pic->pict_format) == 0 && blend->alpha_dst) { if (blend->rgb_src == PIPE_BLENDFACTOR_DST_ALPHA) blend->rgb_src = PIPE_BLENDFACTOR_ONE; else if (blend->rgb_src == PIPE_BLENDFACTOR_INV_DST_ALPHA) @@ -237,7 +261,6 @@ bind_composite_blend_state(struct xa_context *ctx, static unsigned int picture_format_fixups(struct xa_picture *src_pic, - struct xa_picture *dst_pic, int mask) { boolean set_alpha = FALSE; @@ -253,22 +276,17 @@ picture_format_fixups(struct xa_picture *src_pic, src_hw_format = xa_surface_format(src); src_pic_format = src_pic->pict_format; - if (!src || src_hw_format == src_pic_format) { - if (src_pic_format == xa_format_a8) { - if (mask) - return FS_MASK_LUMINANCE; - else if (dst_pic->pict_format != xa_format_a8) { + set_alpha = (xa_format_type_is_color(src_pic_format) && + xa_format_a(src_pic_format) == 0); - /* - * if both dst and src are luminance then - * we don't want to swizzle the alpha (X) of the - * source into W component of the dst because - * it will break our destination - */ - return FS_SRC_LUMINANCE; - } - } - return 0; + if (set_alpha) + ret |= mask ? FS_MASK_SET_ALPHA : FS_SRC_SET_ALPHA; + + if (src_hw_format == src_pic_format) { + if (src->tex->format == PIPE_FORMAT_L8_UNORM) + return ((mask) ? FS_MASK_LUMINANCE : FS_SRC_LUMINANCE); + + return ret; } src_hw_type = xa_format_type(src_hw_format); @@ -280,13 +298,8 @@ picture_format_fixups(struct xa_picture *src_pic, src_pic_type == xa_type_argb))); if (!swizzle && (src_hw_type != src_pic_type)) - return 0; + return ret; - set_alpha = (xa_format_type_is_color(src_pic_format) && - xa_format_a(src_pic_type) == 0); - - if (set_alpha) - ret |= mask ? FS_MASK_SET_ALPHA : FS_SRC_SET_ALPHA; if (swizzle) ret |= mask ? FS_MASK_SWIZZLE_RGB : FS_SRC_SWIZZLE_RGB; @@ -300,7 +313,6 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) struct xa_shader shader; struct xa_picture *src_pic = comp->src; struct xa_picture *mask_pic = comp->mask; - struct xa_picture *dst_pic = comp->dst; ctx->has_solid_color = FALSE; @@ -321,7 +333,7 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) vs_traits |= VS_COMPOSITE; } - fs_traits |= picture_format_fixups(src_pic, dst_pic, 0); + fs_traits |= picture_format_fixups(src_pic, 0); } if (mask_pic) { @@ -340,9 +352,12 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) fs_traits |= FS_CA_FULL; } - fs_traits |= picture_format_fixups(mask_pic, dst_pic, 1); + fs_traits |= picture_format_fixups(mask_pic, 1); } + if (ctx->dst->srf->format == PIPE_FORMAT_L8_UNORM) + fs_traits |= FS_DST_LUMINANCE; + shader = xa_shaders_get(ctx->shaders, vs_traits, fs_traits); cso_set_vertex_shader_handle(ctx->cso, shader.vs); cso_set_fragment_shader_handle(ctx->cso, shader.fs); @@ -433,6 +448,7 @@ xa_composite_prepare(struct xa_context *ctx, if (ret != XA_ERR_NONE) return ret; + ctx->dst = dst_srf; renderer_bind_destination(ctx, dst_srf->srf, dst_srf->srf->width, dst_srf->srf->height); diff --git a/src/gallium/state_trackers/xa/xa_context.c b/src/gallium/state_trackers/xa/xa_context.c index 3cc25ed2071..118a390a14a 100644 --- a/src/gallium/state_trackers/xa/xa_context.c +++ b/src/gallium/state_trackers/xa/xa_context.c @@ -278,13 +278,16 @@ xa_solid_prepare(struct xa_context *ctx, struct xa_surface *dst, int width, height; int ret; - xa_pixel_to_float4(fg, ctx->solid_color); - ctx->has_solid_color = 1; - ret = xa_surface_psurf_create(ctx, dst); if (ret != XA_ERR_NONE) return ret; + if (dst->srf->format == PIPE_FORMAT_L8_UNORM) + xa_pixel_to_float4_a8(fg, ctx->solid_color); + else + xa_pixel_to_float4(fg, ctx->solid_color); + ctx->has_solid_color = 1; + ctx->dst = dst; width = dst->srf->width; height = dst->srf->height; diff --git a/src/gallium/state_trackers/xa/xa_priv.h b/src/gallium/state_trackers/xa/xa_priv.h index 94627e1e9d0..e8f67a12276 100644 --- a/src/gallium/state_trackers/xa/xa_priv.h +++ b/src/gallium/state_trackers/xa/xa_priv.h @@ -135,6 +135,7 @@ enum xa_fs_traits { FS_MASK_SET_ALPHA = 1 << 13, FS_SRC_LUMINANCE = 1 << 14, FS_MASK_LUMINANCE = 1 << 15, + FS_DST_LUMINANCE = 1 << 16, FS_FILL = (FS_SOLID_FILL | FS_LINGRAD_FILL | FS_RADGRAD_FILL), FS_COMPONENT_ALPHA = (FS_CA_FULL | FS_CA_SRCALPHA) @@ -172,6 +173,17 @@ xa_pixel_to_float4(uint32_t pixel, float *color) color[3] = ((float)a) / 255.; } +static INLINE void +xa_pixel_to_float4_a8(uint32_t pixel, float *color) +{ + uint32_t a; + + a = (pixel >> 24) & 0xff; + color[0] = ((float)a) / 255.; + color[1] = ((float)a) / 255.; + color[2] = ((float)a) / 255.; + color[3] = ((float)a) / 255.; +} /* * xa_tgsi.c diff --git a/src/gallium/state_trackers/xa/xa_renderer.c b/src/gallium/state_trackers/xa/xa_renderer.c index 559b2699da6..ef762f0ab49 100644 --- a/src/gallium/state_trackers/xa/xa_renderer.c +++ b/src/gallium/state_trackers/xa/xa_renderer.c @@ -418,6 +418,7 @@ renderer_copy_prepare(struct xa_context *r, struct pipe_context *pipe = r->pipe; struct pipe_screen *screen = pipe->screen; struct xa_shader shader; + uint32_t fs_traits = FS_COMPOSITE; assert(screen->is_format_supported(screen, dst_surface->format, PIPE_TEXTURE_2D, 0, @@ -469,7 +470,12 @@ renderer_copy_prepare(struct xa_context *r, } /* shaders */ - shader = xa_shaders_get(r->shaders, VS_COMPOSITE, FS_COMPOSITE); + if (src_texture->format == PIPE_FORMAT_L8_UNORM) + fs_traits |= FS_SRC_LUMINANCE; + if (dst_surface->format == PIPE_FORMAT_L8_UNORM) + fs_traits |= FS_DST_LUMINANCE; + + shader = xa_shaders_get(r->shaders, VS_COMPOSITE, fs_traits); cso_set_vertex_shader_handle(r->cso, shader.vs); cso_set_fragment_shader_handle(r->cso, shader.fs); diff --git a/src/gallium/state_trackers/xa/xa_tgsi.c b/src/gallium/state_trackers/xa/xa_tgsi.c index fb6ffefd636..ed1690ed369 100644 --- a/src/gallium/state_trackers/xa/xa_tgsi.c +++ b/src/gallium/state_trackers/xa/xa_tgsi.c @@ -85,6 +85,7 @@ print_fs_traits(int fs_traits) "FS_MASK_SET_ALPHA", /* = 1 << 13, */ "FS_SRC_LUMINANCE", /* = 1 << 14, */ "FS_MASK_LUMINANCE", /* = 1 << 15, */ + "FS_DST_LUMINANCE", /* = 1 << 15, */ }; int i, k; @@ -454,6 +455,7 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0; unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0; unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0; + unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0; #if 0 print_fs_traits(fs_traits); @@ -508,7 +510,7 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) #endif if (is_composite) { - if (has_mask || src_luminance) + if (has_mask || src_luminance || dst_luminance) src = ureg_DECL_temporary(ureg); else src = out; @@ -516,14 +518,14 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) src_repeat_none, src_swizzle, src_set_alpha); } else if (is_fill) { if (is_solid) { - if (has_mask || src_luminance) + if (has_mask || src_luminance || dst_luminance) src = ureg_dst(src_input); else ureg_MOV(ureg, out, src_input); } else if (is_lingrad || is_radgrad) { struct ureg_src coords, const0124, matrow0, matrow1, matrow2; - if (has_mask || src_luminance) + if (has_mask || src_luminance || dst_luminance) src = ureg_DECL_temporary(ureg); else src = out; @@ -550,7 +552,7 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X)); ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ), ureg_scalar(imm0, TGSI_SWIZZLE_X)); - if (!has_mask) + if (!has_mask && !dst_luminance) ureg_MOV(ureg, out, ureg_src(src)); } @@ -559,11 +561,21 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0, mask_repeat_none, mask_swizzle, mask_set_alpha); /* src IN mask */ - src_in_mask(ureg, out, ureg_src(src), ureg_src(mask), + + src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src), + ureg_src(mask), comp_alpha_mask, mask_luminance); + ureg_release_temporary(ureg, mask); } + if (dst_luminance) { + /* + * Make sure the alpha channel goes into the output L8 surface. + */ + ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W)); + } + ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); From 1b23d4c9a5b9567ba62aee753d2a6ad6d5d2d177 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 7 Jul 2011 10:18:24 +0200 Subject: [PATCH 034/113] st/xa: Fix up error reporting. Disable component alpha. Component alpha is temporarily disabled since it seems a bit buggy. Signed-off-by: Thomas Hellstrom --- src/gallium/state_trackers/xa/xa_composite.c | 37 ++++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/gallium/state_trackers/xa/xa_composite.c b/src/gallium/state_trackers/xa/xa_composite.c index cc294846e10..347fe0cd379 100644 --- a/src/gallium/state_trackers/xa/xa_composite.c +++ b/src/gallium/state_trackers/xa/xa_composite.c @@ -110,10 +110,16 @@ blend_for_op(struct xa_composite_blend *blend, int i; boolean supported = FALSE; + /* + * Temporarily disable component alpha since it appears buggy. + */ + if (src_pic->component_alpha || + (mask_pic && mask_pic->component_alpha)) + return FALSE; + /* * our default in case something goes wrong */ - *blend = xa_blends[XA_BLEND_OP_OVER]; for (i = 0; i < num_blends; ++i) { @@ -215,13 +221,13 @@ xa_composite_check_accelerated(const struct xa_composite *comp) if (!xa_is_filter_accelerated(src_pic) || !xa_is_filter_accelerated(comp->mask)) { - return XA_ERR_INVAL; + return -XA_ERR_INVAL; } if (src_pic->src_pict) { if (src_pic->src_pict->type != xa_src_pict_solid_fill) - return XA_ERR_INVAL; + return -XA_ERR_INVAL; } if (blend_for_op(&blend, comp->op, comp->src, comp->mask, comp->dst)) { @@ -229,23 +235,24 @@ xa_composite_check_accelerated(const struct xa_composite *comp) if (mask && mask->component_alpha && xa_format_rgb(mask->pict_format)) { if (blend.alpha_src && blend.rgb_src != PIPE_BLENDFACTOR_ZERO) { - return XA_ERR_INVAL; + return -XA_ERR_INVAL; } } return XA_ERR_NONE; } - return XA_ERR_INVAL; + return -XA_ERR_INVAL; } -static void +static int bind_composite_blend_state(struct xa_context *ctx, const struct xa_composite *comp) { struct xa_composite_blend blend_opt; struct pipe_blend_state blend; - blend_for_op(&blend_opt, comp->op, comp->src, comp->mask, comp->dst); + if (!blend_for_op(&blend_opt, comp->op, comp->src, comp->mask, comp->dst)) + return -XA_ERR_INVAL; memset(&blend, 0, sizeof(struct pipe_blend_state)); blend.rt[0].blend_enable = 1; @@ -257,6 +264,7 @@ bind_composite_blend_state(struct xa_context *ctx, blend.rt[0].alpha_dst_factor = blend_opt.rgb_dst; cso_set_blend(ctx->cso, &blend); + return XA_ERR_NONE; } static unsigned int @@ -306,7 +314,7 @@ picture_format_fixups(struct xa_picture *src_pic, return ret; } -static void +static int bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) { unsigned vs_traits = 0, fs_traits = 0; @@ -345,7 +353,9 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) if (mask_pic->component_alpha) { struct xa_composite_blend blend; - blend_for_op(&blend, comp->op, src_pic, mask_pic, NULL); + if (!blend_for_op(&blend, comp->op, src_pic, mask_pic, NULL)) + return -XA_ERR_INVAL; + if (blend.alpha_src) { fs_traits |= FS_CA_SRCALPHA; } else @@ -361,6 +371,7 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) shader = xa_shaders_get(ctx->shaders, vs_traits, fs_traits); cso_set_vertex_shader_handle(ctx->cso, shader.vs); cso_set_fragment_shader_handle(ctx->cso, shader.fs); + return XA_ERR_NONE; } static void @@ -453,8 +464,12 @@ xa_composite_prepare(struct xa_context *ctx, dst_srf->srf->width, dst_srf->srf->height); - bind_composite_blend_state(ctx, comp); - bind_shaders(ctx, comp); + ret = bind_composite_blend_state(ctx, comp); + if (ret != XA_ERR_NONE) + return ret; + ret = bind_shaders(ctx, comp); + if (ret != XA_ERR_NONE) + return ret; bind_samplers(ctx, comp); if (ctx->num_bound_samplers == 0 ) { /* solid fill */ From f934c80faf0d1fb559cee0a903daba321098320d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Thu, 7 Jul 2011 00:15:05 -0700 Subject: [PATCH 035/113] i915g: Apply optimization to ADDS/MUL and only if we're not saturating. --- src/gallium/drivers/i915/i915_fpc_optimize.c | 33 ++++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index 406945c6798..e60c27e5473 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -71,6 +71,24 @@ static boolean is_unswizzled(struct i915_full_src_register* r, return TRUE; } +static boolean op_commutes(unsigned opcode) +{ + if (opcode == TGSI_OPCODE_ADD) return TRUE; + if (opcode == TGSI_OPCODE_MUL) return TRUE; + return FALSE; +} + +static unsigned op_neutral_element(unsigned opcode) +{ + if (opcode == TGSI_OPCODE_ADD) + return TGSI_SWIZZLE_ZERO; + if (opcode == TGSI_OPCODE_MUL) + return TGSI_SWIZZLE_ONE; + + debug_printf("Unknown opcode %d\n",opcode); + return TGSI_SWIZZLE_ZERO; +} + /* * Optimize away things like: * MUL OUT[0].xyz, TEMP[1], TEMP[2] @@ -78,12 +96,14 @@ static boolean is_unswizzled(struct i915_full_src_register* r, * into: * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] * This is useful for optimizing texenv. + * XXX also handle swizzles other than XYZ/W */ -static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next) +static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) { if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && - current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && + op_commutes(current->FullInstruction.Instruction.Opcode) && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && @@ -95,14 +115,15 @@ static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, unio { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; - current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE; + current->FullInstruction.Src[0].Register.SwizzleW = op_neutral_element(current->FullInstruction.Instruction.Opcode); current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; return; } if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && - current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && + op_commutes(current->FullInstruction.Instruction.Opcode) && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && @@ -114,7 +135,7 @@ static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, unio { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; - current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE; + current->FullInstruction.Src[1].Register.SwizzleW = op_neutral_element(current->FullInstruction.Instruction.Opcode); current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; return; } @@ -189,7 +210,7 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) copy_token(&out_tokens->Tokens[i] , &parse.FullToken); if (i > 0) - i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); i++; } From e54354e8853c3521e71e5b94984722f18b7638e4 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 28 Jun 2011 16:06:56 +0200 Subject: [PATCH 036/113] nv50: more formats for nv50_resource_copy_region conversion path It's not supposed to do conversion, but st sometimes asks us to. Sometimes conversion is even wrong (e.g. between UNORM and SRGB). This should now include all formats the 2D engine supports. --- src/gallium/drivers/nv50/nv50_surface.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3d7e880ccce..fb51db84ac6 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -34,25 +34,16 @@ #include "nv50_defs.xml.h" +#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL + /* return TRUE for formats that can be converted among each other by NV50_2D */ static INLINE boolean nv50_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + uint8_t id = nv50_format_table[format].rt; + + return (id >= 0xc0) && + (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); } static INLINE uint8_t @@ -63,7 +54,7 @@ nv50_2d_format(enum pipe_format format) /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)))) return id; switch (util_format_get_blocksize(format)) { From 3069a7eaa5d83e7f41616347ba244c0dc0d944ae Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 7 Jul 2011 14:58:29 +0200 Subject: [PATCH 037/113] nv50,nvc0: use screen instead of context for flush notifier Context may become NULL and we still have to be able to flush pending fences. --- src/gallium/drivers/nv50/nv50_context.c | 13 +++++-------- src/gallium/drivers/nv50/nv50_screen.c | 2 ++ src/gallium/drivers/nv50/nv50_state_validate.c | 3 +-- src/gallium/drivers/nv50/nv50_vbo.c | 7 +++---- src/gallium/drivers/nvc0/nvc0_context.c | 13 +++++-------- src/gallium/drivers/nvc0/nvc0_screen.c | 8 ++++++-- src/gallium/drivers/nvc0/nvc0_state_validate.c | 3 +-- src/gallium/drivers/nvc0/nvc0_vbo.c | 7 +++---- 8 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index ceb83f6e684..ac3e361a446 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -60,13 +60,13 @@ nv50_texture_barrier(struct pipe_context *pipe) void nv50_default_flush_notify(struct nouveau_channel *chan) { - struct nv50_context *nv50 = chan->user_private; + struct nv50_screen *screen = chan->user_private; - if (!nv50) + if (!screen) return; - nouveau_fence_update(&nv50->screen->base, TRUE); - nouveau_fence_next(&nv50->screen->base); + nouveau_fence_update(&screen->base, TRUE); + nouveau_fence_next(&screen->base); } static void @@ -100,10 +100,8 @@ nv50_destroy(struct pipe_context *pipe) draw_destroy(nv50->draw); - if (nv50->screen->cur_ctx == nv50) { - nv50->screen->base.channel->user_private = NULL; + if (nv50->screen->cur_ctx == nv50) nv50->screen->cur_ctx = NULL; - } FREE(nv50); } @@ -140,7 +138,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv) if (!screen->cur_ctx) screen->cur_ctx = nv50; - screen->base.channel->user_private = nv50; screen->base.channel->flush_notify = nv50_default_flush_notify; nv50_init_query_functions(nv50); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index cc921d08666..4cda303c44a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -215,6 +215,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_fence_wait(screen->base.fence.current); nouveau_fence_ref (NULL, &screen->base.fence.current); } + screen->base.channel->user_private = NULL; nouveau_bo_ref(NULL, &screen->code); nouveau_bo_ref(NULL, &screen->tls_bo); @@ -300,6 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) FAIL_SCREEN_INIT("nouveau_screen_init failed: %d\n", ret); chan = screen->base.channel; + chan->user_private = screen; pscreen->winsys = ws; pscreen->destroy = nv50_screen_destroy; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 11561f5a8e6..d29c1e9723f 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -282,8 +282,7 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) if (!ctx_to->zsa) ctx_to->dirty &= ~NV50_NEW_ZSA; - ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = - ctx_to; + ctx_to->screen->cur_ctx = ctx_to; } static struct state_validate { diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index bb08941c243..f23008ae4cf 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -389,11 +389,11 @@ nv50_prim_gl(unsigned prim) static void nv50_draw_vbo_flush_notify(struct nouveau_channel *chan) { - struct nv50_context *nv50 = chan->user_private; + struct nv50_screen *screen = chan->user_private; - nouveau_fence_update(&nv50->screen->base, TRUE); + nouveau_fence_update(&screen->base, TRUE); - nv50_bufctx_emit_relocs(nv50); + nv50_bufctx_emit_relocs(screen->cur_ctx); } static void @@ -650,7 +650,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50_state_validate(nv50); chan->flush_notify = nv50_draw_vbo_flush_notify; - chan->user_private = nv50; if (nv50->vbo_fifo) { nv50_push_vbo(nv50, info); diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2679b7f86aa..983db23eedb 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -89,10 +89,8 @@ nvc0_destroy(struct pipe_context *pipe) draw_destroy(nvc0->draw); - if (nvc0->screen->cur_ctx == nvc0) { - nvc0->screen->base.channel->user_private = NULL; + if (nvc0->screen->cur_ctx == nvc0) nvc0->screen->cur_ctx = NULL; - } FREE(nvc0); } @@ -100,13 +98,13 @@ nvc0_destroy(struct pipe_context *pipe) void nvc0_default_flush_notify(struct nouveau_channel *chan) { - struct nvc0_context *nvc0 = chan->user_private; + struct nvc0_screen *screen = chan->user_private; - if (!nvc0) + if (!screen) return; - nouveau_fence_update(&nvc0->screen->base, TRUE); - nouveau_fence_next(&nvc0->screen->base); + nouveau_fence_update(&screen->base, TRUE); + nouveau_fence_next(&screen->base); } struct pipe_context * @@ -141,7 +139,6 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) if (!screen->cur_ctx) screen->cur_ctx = nvc0; - screen->base.channel->user_private = nvc0; screen->base.channel->flush_notify = nvc0_default_flush_notify; nvc0_init_query_functions(nvc0); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 34bf0f0a2ad..1bd7fa9f0ea 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -198,8 +198,11 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) { struct nvc0_screen *screen = nvc0_screen(pscreen); - nouveau_fence_wait(screen->base.fence.current); - nouveau_fence_ref(NULL, &screen->base.fence.current); + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref(NULL, &screen->base.fence.current); + } + screen->base.channel->user_private = NULL; nouveau_bo_ref(NULL, &screen->text); nouveau_bo_ref(NULL, &screen->tls); @@ -358,6 +361,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } chan = screen->base.channel; + chan->user_private = screen; pscreen->winsys = ws; pscreen->destroy = nvc0_screen_destroy; diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 9b2a28150b1..f300f37fb7b 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -428,8 +428,7 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) if (!ctx_to->zsa) ctx_to->dirty &= ~NVC0_NEW_ZSA; - ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = - ctx_to; + ctx_to->screen->cur_ctx = ctx_to; } static struct state_validate { diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 41079104b39..8a5bf8dc582 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -367,11 +367,11 @@ nvc0_prim_gl(unsigned prim) static void nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) { - struct nvc0_context *nvc0 = chan->user_private; + struct nvc0_screen *screen = chan->user_private; - nouveau_fence_update(&nvc0->screen->base, TRUE); + nouveau_fence_update(&screen->base, TRUE); - nvc0_bufctx_emit_relocs(nvc0); + nvc0_bufctx_emit_relocs(screen->cur_ctx); } static void @@ -587,7 +587,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0_state_validate(nvc0); chan->flush_notify = nvc0_draw_vbo_flush_notify; - chan->user_private = nvc0; if (nvc0->vbo_fifo) { nvc0_push_vbo(nvc0, info); From 2e35d90fb9a50562d3c658d45a50e16623028d8e Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 4 Jul 2011 00:57:42 +0200 Subject: [PATCH 038/113] mesa: Check the texture against all units in unbind_texobj_from_texunits(). NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Henri Verbeet Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/texobj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 565a3a2d8df..0e84b874411 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -899,7 +899,7 @@ unbind_texobj_from_texunits(struct gl_context *ctx, { GLuint u, tex; - for (u = 0; u < MAX_TEXTURE_IMAGE_UNITS; u++) { + for (u = 0; u < Elements(ctx->Texture.Unit); u++) { struct gl_texture_unit *unit = &ctx->Texture.Unit[u]; for (tex = 0; tex < NUM_TEXTURE_TARGETS; tex++) { if (texObj == unit->CurrentTex[tex]) { From 86adc2b29effb573c18eb0de7016cef605ab1edc Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 4 Jul 2011 00:57:42 +0200 Subject: [PATCH 039/113] mesa: Allow sampling from units >= MAX_TEXTURE_UNITS in shaders. The total number of units used by a shader is limited to MAX_TEXTURE_UNITS, but the actual indices are only limited by MAX_COMBINED_TEXTURE_IMAGE_UNITS, since they're shared between vertex and fragment shaders. NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Henri Verbeet Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/mtypes.h | 2 +- src/mesa/main/shaderapi.c | 2 +- src/mesa/main/uniforms.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f018c75cc6a..b88118366b2 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1856,7 +1856,7 @@ struct gl_program GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ + GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index b58e30de9c4..cb02e430c78 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1032,7 +1032,7 @@ validate_samplers(const struct gl_program *prog, char *errMsg) "TEXTURE_2D", "TEXTURE_1D", }; - GLint targetUsed[MAX_TEXTURE_IMAGE_UNITS]; + GLint targetUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; GLbitfield samplersUsed = prog->SamplersUsed; GLuint i; diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 1c4fd82baac..dd069a3a4d1 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -580,7 +580,7 @@ _mesa_update_shader_textures_used(struct gl_program *prog) if (prog->SamplersUsed & (1 << s)) { GLuint unit = prog->SamplerUnits[s]; GLuint tgt = prog->SamplerTargets[s]; - assert(unit < MAX_TEXTURE_IMAGE_UNITS); + assert(unit < Elements(prog->TexturesUsed)); assert(tgt < NUM_TEXTURE_TARGETS); prog->TexturesUsed[unit] |= (1 << tgt); } @@ -674,7 +674,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ - if (texUnit >= ctx->Const.MaxTextureImageUnits) { + if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, "glUniform1(invalid sampler/tex unit index for '%s')", param->Name); From 47441956284b3e22df2ee8500667617d2880bfc8 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 4 Jul 2011 00:57:42 +0200 Subject: [PATCH 040/113] mesa: Use the Elements macro for the sampler index assert in validate_samplers(). This is probably nicer if the array size ever changes. NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Henri Verbeet Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/shaderapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index cb02e430c78..8df25c3f988 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1050,7 +1050,7 @@ validate_samplers(const struct gl_program *prog, char *errMsg) gl_texture_index target; GLint sampler = _mesa_ffs(samplersUsed) - 1; assert(sampler >= 0); - assert(sampler < MAX_TEXTURE_IMAGE_UNITS); + assert(sampler < Elements(prog->SamplerUnits)); unit = prog->SamplerUnits[sampler]; target = prog->SamplerTargets[sampler]; if (targetUsed[unit] != -1 && targetUsed[unit] != (int) target) { From bfe284fd26e96b71c7cf46e6365b3697d68cde83 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 4 Jul 2011 00:57:42 +0200 Subject: [PATCH 041/113] mesa: Fix a couple of TexEnv unit limits. NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Henri Verbeet Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/texenv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index 9228e354a4d..c0d0f3779b2 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -419,7 +419,7 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param ) ASSERT_OUTSIDE_BEGIN_END(ctx); maxUnit = (target == GL_POINT_SPRITE_NV && pname == GL_COORD_REPLACE_NV) - ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxTextureImageUnits; + ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxCombinedTextureImageUnits; if (ctx->Texture.CurrentUnit >= maxUnit) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTexEnvfv(current unit)"); return; @@ -748,7 +748,7 @@ _mesa_GetTexEnvfv( GLenum target, GLenum pname, GLfloat *params ) ASSERT_OUTSIDE_BEGIN_END(ctx); maxUnit = (target == GL_POINT_SPRITE_NV && pname == GL_COORD_REPLACE_NV) - ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxTextureImageUnits; + ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxCombinedTextureImageUnits; if (ctx->Texture.CurrentUnit >= maxUnit) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexEnvfv(current unit)"); return; @@ -817,7 +817,7 @@ _mesa_GetTexEnviv( GLenum target, GLenum pname, GLint *params ) ASSERT_OUTSIDE_BEGIN_END(ctx); maxUnit = (target == GL_POINT_SPRITE_NV && pname == GL_COORD_REPLACE_NV) - ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxTextureImageUnits; + ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxCombinedTextureImageUnits; if (ctx->Texture.CurrentUnit >= maxUnit) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexEnviv(current unit)"); return; From e01e30b916dcbe01471a573ecaddb509cb9f0969 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 4 Jul 2011 00:57:43 +0200 Subject: [PATCH 042/113] mesa: Fix the BindSampler unit limit. I'm not sure about this one. The current code actually follows the spec, but considering the spec is supposed to be written against GL 3.2 I'd say the spec is broken. I filled out a spec feedback form over a month ago, but either the form is broken, or nobody cares. Signed-off-by: Henri Verbeet Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/samplerobj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index f7774fdd7cb..8f8d87b90e8 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -251,7 +251,7 @@ _mesa_BindSampler(GLuint unit, GLuint sampler) struct gl_sampler_object *sampObj; GET_CURRENT_CONTEXT(ctx); - if (unit >= ctx->Const.MaxTextureImageUnits) { + if (unit >= ctx->Const.MaxCombinedTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, "glBindSampler(unit %u)", unit); return; } From a68c5e6b71ac9abf70eb443637d4e79ffba66595 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 29 Jun 2011 23:50:05 -0700 Subject: [PATCH 043/113] i965: Convert 3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP to OUT_BATCH style. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_misc_state.c | 15 +++++---------- src/mesa/drivers/dri/i965/brw_structs.h | 9 --------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 033c77cd321..6e2620ae9f9 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -621,16 +621,11 @@ static void upload_invarient_state( struct brw_context *brw ) } if (intel->gen < 6) { - struct brw_global_depth_offset_clamp gdo; - memset(&gdo, 0, sizeof(gdo)); - - /* Disable depth offset clamping. - */ - gdo.header.opcode = _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP; - gdo.header.length = sizeof(gdo)/4 - 2; - gdo.depth_offset_clamp = 0.0; - - BRW_BATCH_STRUCT(brw, &gdo); + /* Disable depth offset clamping. */ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); + OUT_BATCH_F(0.0); + ADVANCE_BATCH(); } if (intel->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 7b9cdba4cbf..1b9644f4776 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -225,15 +225,6 @@ struct brw_drawrect GLuint yorg:16; }; - - - -struct brw_global_depth_offset_clamp -{ - struct header header; - GLfloat depth_offset_clamp; -}; - struct brw_indexbuffer { union { From 473a519d20c97b54504ff61aaedc0665529c14b7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 30 Jun 2011 00:00:36 -0700 Subject: [PATCH 044/113] i965: Emit 3DSTATE_VF_STATISTICS in OUT_BATCH style. This is a little different from most because it's a single DWord; there's no length field. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_misc_state.c | 14 ++++---------- src/mesa/drivers/dri/i965/brw_structs.h | 8 -------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 6e2620ae9f9..a2ee7a592e2 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -672,16 +672,10 @@ static void upload_invarient_state( struct brw_context *brw ) } - { - struct brw_vf_statistics vfs; - memset(&vfs, 0, sizeof(vfs)); - - vfs.opcode = brw->CMD_VF_STATISTICS; - if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - vfs.statistics_enable = 1; - - BRW_BATCH_STRUCT(brw, &vfs); - } + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_VF_STATISTICS << 16 | + (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0)); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_invarient_state = { diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 1b9644f4776..d40f8f7a5cc 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -95,14 +95,6 @@ struct brw_mi_flush GLuint opcode:16; }; -struct brw_vf_statistics -{ - GLuint statistics_enable:1; - GLuint pad:15; - GLuint opcode:16; -}; - - struct brw_binding_table_pointers { From 87de78523ae96737a39267aaa135ddabff05f2f2 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 30 Jun 2011 00:05:49 -0700 Subject: [PATCH 045/113] i965: Convert PIPELINE_SELECT to OUT_BATCH style. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_misc_state.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index a2ee7a592e2..fcbd97b44c8 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -609,16 +609,10 @@ static void upload_invarient_state( struct brw_context *brw ) if (intel->gen == 6) intel_emit_post_sync_nonzero_flush(intel); - { - /* 0x61040000 Pipeline Select */ - /* PipelineSelect : 0 */ - struct brw_pipeline_select ps; - - memset(&ps, 0, sizeof(ps)); - ps.header.opcode = brw->CMD_PIPELINE_SELECT; - ps.header.pipeline_select = 0; - BRW_BATCH_STRUCT(brw, &ps); - } + /* Select the 3D pipeline (as opposed to media) */ + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0); + ADVANCE_BATCH(); if (intel->gen < 6) { /* Disable depth offset clamping. */ From 247e4c69ce6389805437256524d043019abfa6d1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 30 Jun 2011 00:11:39 -0700 Subject: [PATCH 046/113] i965: Convert system instruction pointer to OUT_BATCH style. Also rename it from CMD_STATE_INSN_POINTER to CMD_STATE_SIP to match the documentation. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_defines.h | 2 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6d41b1e69d3..a75171da316 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -867,7 +867,7 @@ #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 -#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_STATE_SIP 0x6102 #define CMD_PIPELINE_SELECT_965 0x6104 #define CMD_PIPELINE_SELECT_GM45 0x6904 diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index fcbd97b44c8..bc8ef783ef5 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -652,19 +652,10 @@ static void upload_invarient_state( struct brw_context *brw ) } } - /* 0x61020000 State Instruction Pointer */ - { - struct brw_system_instruction_pointer sip; - memset(&sip, 0, sizeof(sip)); - - sip.header.opcode = CMD_STATE_INSN_POINTER; - sip.header.length = 0; - sip.bits0.pad = 0; - sip.bits0.system_instruction_pointer = 0; - - BRW_BATCH_STRUCT(brw, &sip); - } - + BEGIN_BATCH(2); + OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); BEGIN_BATCH(1); OUT_BATCH(brw->CMD_VF_STATISTICS << 16 | From 3de9405763ad4b9e78577699ec206be7dda03374 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 30 Jun 2011 00:15:34 -0700 Subject: [PATCH 047/113] i965: Remove unused structures for command packets. We simply emit these using OUT_BATCH and bitshifting, as it results in better compiled code than packed structures. Since our documentation is public, it's not terribly useful to keep these around for reference. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_structs.h | 433 ------------------------ 1 file changed, 433 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index d40f8f7a5cc..e1947d5ea64 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -40,46 +40,6 @@ /** Number of message register file registers */ #define BRW_MAX_MRF 16 - -/* Command packets: - */ -struct header -{ - GLuint length:16; - GLuint opcode:16; -}; - - -union header_union -{ - struct header bits; - GLuint dword; -}; - -struct brw_3d_control -{ - struct - { - GLuint length:8; - GLuint notify_enable:1; - GLuint pad:3; - GLuint wc_flush_enable:1; - GLuint depth_stall_enable:1; - GLuint operation:2; - GLuint opcode:16; - } header; - - struct - { - GLuint pad:2; - GLuint dest_addr_type:1; - GLuint dest_addr:29; - } dest; - - GLuint dword2; - GLuint dword3; -}; - /* These seem to be passed around as function args, so it works out * better to keep them as #defines: */ @@ -88,297 +48,6 @@ struct brw_3d_control #define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 #define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 -struct brw_mi_flush -{ - GLuint flags:4; - GLuint pad:12; - GLuint opcode:16; -}; - - -struct brw_binding_table_pointers -{ - struct header header; - GLuint vs; - GLuint gs; - GLuint clp; - GLuint sf; - GLuint wm; -}; - - -struct brw_blend_constant_color -{ - struct header header; - GLfloat blend_constant_color[4]; -}; - - -struct brw_depthbuffer -{ - union header_union header; - - union { - struct { - GLuint pitch:18; - GLuint format:3; - GLuint pad:2; - GLuint software_tiled_rendering_mode:2; - GLuint depth_offset_disable:1; - GLuint tile_walk:1; - GLuint tiled_surface:1; - GLuint pad2:1; - GLuint surface_type:3; - } bits; - GLuint dword; - } dword1; - - GLuint dword2_base_addr; - - union { - struct { - GLuint pad:1; - GLuint mipmap_layout:1; - GLuint lod:4; - GLuint width:13; - GLuint height:13; - } bits; - GLuint dword; - } dword3; - - union { - struct { - GLuint pad:10; - GLuint min_array_element:11; - GLuint depth:11; - } bits; - GLuint dword; - } dword4; -}; - -struct brw_depthbuffer_g4x -{ - union header_union header; - - union { - struct { - GLuint pitch:18; - GLuint format:3; - GLuint pad:2; - GLuint software_tiled_rendering_mode:2; - GLuint depth_offset_disable:1; - GLuint tile_walk:1; - GLuint tiled_surface:1; - GLuint pad2:1; - GLuint surface_type:3; - } bits; - GLuint dword; - } dword1; - - GLuint dword2_base_addr; - - union { - struct { - GLuint pad:1; - GLuint mipmap_layout:1; - GLuint lod:4; - GLuint width:13; - GLuint height:13; - } bits; - GLuint dword; - } dword3; - - union { - struct { - GLuint pad:10; - GLuint min_array_element:11; - GLuint depth:11; - } bits; - GLuint dword; - } dword4; - - union { - struct { - GLuint xoffset:16; - GLuint yoffset:16; - } bits; - GLuint dword; - } dword5; /* NEW in Integrated Graphics Device */ -}; - -struct brw_drawrect -{ - struct header header; - GLuint xmin:16; - GLuint ymin:16; - GLuint xmax:16; - GLuint ymax:16; - GLuint xorg:16; - GLuint yorg:16; -}; - -struct brw_indexbuffer -{ - union { - struct - { - GLuint length:8; - GLuint index_format:2; - GLuint cut_index_enable:1; - GLuint pad:5; - GLuint opcode:16; - } bits; - GLuint dword; - - } header; - - GLuint buffer_start; - GLuint buffer_end; -}; - -/* NEW in Integrated Graphics Device */ -struct brw_aa_line_parameters -{ - struct header header; - - struct { - GLuint aa_coverage_slope:8; - GLuint pad0:8; - GLuint aa_coverage_bias:8; - GLuint pad1:8; - } bits0; - - struct { - GLuint aa_coverage_endcap_slope:8; - GLuint pad0:8; - GLuint aa_coverage_endcap_bias:8; - GLuint pad1:8; - } bits1; -}; - -struct brw_line_stipple -{ - struct header header; - - struct - { - GLuint pattern:16; - GLuint pad:16; - } bits0; - - struct - { - GLuint repeat_count:9; - GLuint pad:7; - GLuint inverse_repeat_count:16; - } bits1; -}; - - -struct brw_pipelined_state_pointers -{ - struct header header; - - struct { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } vs; - - struct - { - GLuint enable:1; - GLuint pad:4; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } gs; - - struct - { - GLuint enable:1; - GLuint pad:4; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } clp; - - struct - { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } sf; - - struct - { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } wm; - - struct - { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ - } cc; -}; - - -struct brw_polygon_stipple_offset -{ - struct header header; - - struct { - GLuint y_offset:5; - GLuint pad:3; - GLuint x_offset:5; - GLuint pad0:19; - } bits0; -}; - - - -struct brw_polygon_stipple -{ - struct header header; - GLuint stipple[32]; -}; - - - -struct brw_pipeline_select -{ - struct - { - GLuint pipeline_select:1; - GLuint pad:15; - GLuint opcode:16; - } header; -}; - - -struct brw_pipe_control -{ - struct - { - GLuint length:8; - GLuint notify_enable:1; - GLuint texture_cache_flush_enable:1; - GLuint indirect_state_pointers_disable:1; - GLuint instruction_state_cache_flush_enable:1; - GLuint write_cache_flush_enable:1; - GLuint depth_stall_enable:1; - GLuint post_sync_operation:2; - - GLuint opcode:16; - } header; - - struct - { - GLuint pad:2; - GLuint dest_addr_type:1; - GLuint dest_addr:29; - } bits1; - - GLuint data0; - GLuint data1; -}; - - struct brw_urb_fence { struct @@ -411,102 +80,6 @@ struct brw_urb_fence } bits1; }; -struct brw_cs_urb_state -{ - struct header header; - - struct - { - GLuint nr_urb_entries:3; - GLuint pad:1; - GLuint urb_entry_size:5; - GLuint pad0:23; - } bits0; -}; - -struct brw_constant_buffer -{ - struct - { - GLuint length:8; - GLuint valid:1; - GLuint pad:7; - GLuint opcode:16; - } header; - - struct - { - GLuint buffer_length:6; - GLuint buffer_address:26; - } bits0; -}; - -struct brw_state_base_address -{ - struct header header; - - struct - { - GLuint modify_enable:1; - GLuint pad:4; - GLuint general_state_address:27; - } bits0; - - struct - { - GLuint modify_enable:1; - GLuint pad:4; - GLuint surface_state_address:27; - } bits1; - - struct - { - GLuint modify_enable:1; - GLuint pad:4; - GLuint indirect_object_state_address:27; - } bits2; - - struct - { - GLuint modify_enable:1; - GLuint pad:11; - GLuint general_state_upper_bound:20; - } bits3; - - struct - { - GLuint modify_enable:1; - GLuint pad:11; - GLuint indirect_object_state_upper_bound:20; - } bits4; -}; - -struct brw_state_prefetch -{ - struct header header; - - struct - { - GLuint prefetch_count:3; - GLuint pad:3; - GLuint prefetch_pointer:26; - } bits0; -}; - -struct brw_system_instruction_pointer -{ - struct header header; - - struct - { - GLuint pad:4; - GLuint system_instruction_pointer:28; - } bits0; -}; - - - - /* State structs for the various fixed function units: */ @@ -1310,12 +883,6 @@ struct brw_vertex_element_state #define BRW_VEP_MAX 18 -struct brw_vertex_element_packet { - struct header header; - struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ -}; - - struct brw_urb_immediate { GLuint opcode:4; GLuint offset:6; From 86e62b2357447b7c97f434be4834f4b50aa0764d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 15 Jun 2011 12:02:12 -0700 Subject: [PATCH 048/113] intel: Mark MESA_FORMAT_X8_Z24 as always supported. This prevents developer surprise at seeing a GL_DEPTH_COMPONENT texture have stencil bits, and avoids the metaops path accidentally copying stencil bits around in glCopyTexImage(GL_DEPTH_COMPONENT) (and being broken because swrast's glReadPixels(GL_UNSIGNED_INT_24_8) is broken). Acked-by: Chad Versace --- src/mesa/drivers/dri/intel/intel_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 70aee52bd14..547d81b9351 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -661,7 +661,7 @@ intelInitContext(struct intel_context *intel, /* Depth and stencil */ ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE; - ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = intel->has_separate_stencil; + ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = GL_TRUE; ctx->TextureFormatSupported[MESA_FORMAT_S8] = intel->has_separate_stencil; /* From 036b74a7f8adc745c7af089129f070b8e5b8f4bd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jun 2011 11:09:49 -0700 Subject: [PATCH 049/113] intel: Allow intel_region_reference() with *dst != NULL. This should help us avoid leaking regions in region reference code by making the API more predictable. Reviewed-by: Chad Versace --- src/mesa/drivers/dri/intel/intel_regions.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index a4da1ce4fa5..6cf37c4c40c 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -264,11 +264,13 @@ intel_region_alloc_for_handle(struct intel_screen *screen, void intel_region_reference(struct intel_region **dst, struct intel_region *src) { - if (src) - _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); + _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, + *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); + + if (src != *dst) { + if (*dst) + intel_region_release(dst); - assert(*dst == NULL); - if (src) { src->refcount++; *dst = src; } From b904321ed018c661271fb1fc3eefd1af0ec61c7f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jun 2011 11:14:23 -0700 Subject: [PATCH 050/113] intel: Rely on intel_region_reference()'s support of *dst != NULL. Reviewed-by: Chad Versace --- src/mesa/drivers/dri/i915/i830_vtbl.c | 2 -- src/mesa/drivers/dri/i915/i915_vtbl.c | 2 -- src/mesa/drivers/dri/intel/intel_fbo.c | 13 ------------- src/mesa/drivers/dri/intel/intel_screen.c | 1 - 4 files changed, 18 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 584df82b50c..7775e71381f 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -618,11 +618,9 @@ i830_set_draw_region(struct intel_context *intel, uint32_t draw_x, draw_y; if (state->draw_region != color_regions[0]) { - intel_region_release(&state->draw_region); intel_region_reference(&state->draw_region, color_regions[0]); } if (state->depth_region != depth_region) { - intel_region_release(&state->depth_region); intel_region_reference(&state->depth_region, depth_region); } diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 9721a1c0e4d..cd7d108222e 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -570,11 +570,9 @@ i915_set_draw_region(struct intel_context *intel, uint32_t draw_x, draw_y, draw_offset; if (state->draw_region != color_regions[0]) { - intel_region_release(&state->draw_region); intel_region_reference(&state->draw_region, color_regions[0]); } if (state->depth_region != depth_region) { - intel_region_release(&state->depth_region); intel_region_reference(&state->depth_region, depth_region); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 90c3909d1d8..ee656edae5b 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -277,8 +277,6 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, return; irb = intel_renderbuffer(rb); - if (irb->region) - intel_region_release(&irb->region); intel_region_reference(&irb->region, image->region); rb->InternalFormat = image->internal_format; @@ -351,12 +349,7 @@ intel_renderbuffer_set_region(struct intel_context *intel, struct intel_renderbuffer *rb, struct intel_region *region) { - struct intel_region *old; - - old = rb->region; - rb->region = NULL; intel_region_reference(&rb->region, region); - intel_region_release(&old); } @@ -365,10 +358,7 @@ intel_renderbuffer_set_hiz_region(struct intel_context *intel, struct intel_renderbuffer *rb, struct intel_region *region) { - struct intel_region *old = rb->hiz_region; - rb->hiz_region = NULL; intel_region_reference(&rb->hiz_region, region); - intel_region_release(&old); } @@ -572,7 +562,6 @@ intel_update_tex_wrapper_regions(struct intel_context *intel, /* Point the renderbuffer's region to the texture's region. */ if (irb->region != intel_image->mt->region) { - intel_region_release(&irb->region); intel_region_reference(&irb->region, intel_image->mt->region); } @@ -592,7 +581,6 @@ intel_update_tex_wrapper_regions(struct intel_context *intel, /* Point the renderbuffer's hiz region to the texture's hiz region. */ if (irb->hiz_region != intel_image->mt->hiz_region) { - intel_region_release(&irb->hiz_region); intel_region_reference(&irb->hiz_region, intel_image->mt->hiz_region); } @@ -770,7 +758,6 @@ intel_render_texture(struct gl_context * ctx, intel_image->mt = new_mt; intel_renderbuffer_set_draw_offset(irb, intel_image, att->Zoffset); - intel_region_release(&irb->region); intel_region_reference(&irb->region, intel_image->mt->region); } #endif diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 2a3a601ddba..bd8d574a29e 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -291,7 +291,6 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) if (image == NULL) return NULL; - image->region = NULL; intel_region_reference(&image->region, orig_image->region); if (image->region == NULL) { FREE(image); From c7ef5e8498550e6ed4d609641ca6deb932882485 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jun 2011 11:26:32 -0700 Subject: [PATCH 051/113] intel: Remove now trivial intel_renderbuffer_set_{hiz_,}region(). As a result of this cleanup, a bug in intel_process_dri2_buffer_no_separate_stencil() became quite apparent. We were associating the NULL pointer after an unreference with the STENCIL attachment -- clarify the logic and attach the right region. Reviewed-by: Chad Versace --- src/mesa/drivers/dri/intel/intel_context.c | 45 +++++++++------------- src/mesa/drivers/dri/intel/intel_fbo.c | 19 --------- src/mesa/drivers/dri/intel/intel_fbo.h | 12 ------ 3 files changed, 19 insertions(+), 57 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 547d81b9351..0402d83e9e3 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1112,7 +1112,6 @@ intel_query_dri2_buffers_no_separate_stencil(struct intel_context *intel, * * \see intel_update_renderbuffers() * \see intel_region_alloc_for_handle() - * \see intel_renderbuffer_set_region() */ static void intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, @@ -1124,7 +1123,6 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, assert(!intel->must_use_separate_stencil); struct gl_framebuffer *fb = drawable->driverPrivate; - struct intel_region *region = NULL; struct intel_renderbuffer *depth_rb = NULL; if (!rb) @@ -1151,20 +1149,18 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { fprintf(stderr, "(reusing depth buffer as stencil)\n"); } - intel_region_reference(®ion, depth_rb->region); + intel_region_reference(&rb->region, depth_rb->region); } else { - region = intel_region_alloc_for_handle(intel->intelScreen, - buffer->cpp, - drawable->w, - drawable->h, - buffer->pitch / buffer->cpp, - buffer->name, - buffer_name); + intel_region_release(&rb->region); + rb->region = intel_region_alloc_for_handle(intel->intelScreen, + buffer->cpp, + drawable->w, + drawable->h, + buffer->pitch / buffer->cpp, + buffer->name, + buffer_name); } - intel_renderbuffer_set_region(intel, rb, region); - intel_region_release(®ion); - if (buffer->attachment == __DRI_BUFFER_DEPTH_STENCIL) { struct intel_renderbuffer *stencil_rb = intel_get_renderbuffer(fb, BUFFER_STENCIL); @@ -1172,10 +1168,10 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, if (!stencil_rb) return; - if (stencil_rb->region && stencil_rb->region->name == buffer->name) - return; - - intel_renderbuffer_set_region(intel, stencil_rb, region); + /* The rb passed in is the BUFFER_DEPTH attachment, and we need + * to associate this region to BUFFER_STENCIL as well. + */ + intel_region_reference(&stencil_rb->region, rb->region); } } @@ -1300,7 +1296,6 @@ intel_query_dri2_buffers_with_separate_stencil(struct intel_context *intel, * * \see intel_update_renderbuffers() * \see intel_region_alloc_for_handle() - * \see intel_renderbuffer_set_region() * \see enum intel_dri2_has_hiz */ static void @@ -1360,9 +1355,9 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, buffer_name); if (buffer->attachment == __DRI_BUFFER_HIZ) { - intel_renderbuffer_set_hiz_region(intel, rb, region); + intel_region_reference(&rb->hiz_region, region); } else { - intel_renderbuffer_set_region(intel, rb, region); + intel_region_reference(&rb->region, region); } intel_region_release(®ion); @@ -1511,12 +1506,10 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, / depth_stencil_buffer->cpp, depth_stencil_buffer->name, "dri2 depth / stencil buffer"); - intel_renderbuffer_set_region(intel, - intel_get_renderbuffer(fb, BUFFER_DEPTH), - region); - intel_renderbuffer_set_region(intel, - intel_get_renderbuffer(fb, BUFFER_STENCIL), - region); + intel_region_reference(&intel_get_renderbuffer(fb, BUFFER_DEPTH)->region, + region); + intel_region_reference(&intel_get_renderbuffer(fb, BUFFER_STENCIL)->region, + region); intel_region_release(®ion); } } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index ee656edae5b..12460020090 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -343,25 +343,6 @@ intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, return GL_FALSE; } - -void -intel_renderbuffer_set_region(struct intel_context *intel, - struct intel_renderbuffer *rb, - struct intel_region *region) -{ - intel_region_reference(&rb->region, region); -} - - -void -intel_renderbuffer_set_hiz_region(struct intel_context *intel, - struct intel_renderbuffer *rb, - struct intel_region *region) -{ - intel_region_reference(&rb->hiz_region, region); -} - - /** * Create a new intel_renderbuffer which corresponds to an on-screen window, * not a user-created renderbuffer. diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index cbf29c86257..f7f99a4f00c 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -155,18 +155,6 @@ intel_framebuffer_has_hiz(struct gl_framebuffer *fb) return intel_framebuffer_get_hiz_region(fb) != NULL; } - -extern void -intel_renderbuffer_set_region(struct intel_context *intel, - struct intel_renderbuffer *irb, - struct intel_region *region); - -extern void -intel_renderbuffer_set_hiz_region(struct intel_context *intel, - struct intel_renderbuffer *rb, - struct intel_region *region); - - extern struct intel_renderbuffer * intel_create_renderbuffer(gl_format format); From 007c2d6cd2f6b206564689ac12a3e51aaae242bc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Jun 2011 11:37:35 -0700 Subject: [PATCH 052/113] intel: Remove gratuitous context checks in intel_delete_renderbuffer(). Even if we don't have a current context, if we're freeing the rb we should free its region (and BO). The renderbuffer unreference checks appear to be just cargo-cult from the region unreference code. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30217 Reviewed-by: Chad Versace --- src/mesa/drivers/dri/intel/intel_fbo.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 12460020090..1669af2c2a7 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -70,24 +70,15 @@ intel_new_framebuffer(struct gl_context * ctx, GLuint name) static void intel_delete_renderbuffer(struct gl_renderbuffer *rb) { - GET_CURRENT_CONTEXT(ctx); - struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); ASSERT(irb); - if (intel && irb->region) { - intel_region_release(&irb->region); - } - if (intel && irb->hiz_region) { - intel_region_release(&irb->hiz_region); - } - if (intel && irb->wrapped_depth) { - _mesa_reference_renderbuffer(&irb->wrapped_depth, NULL); - } - if (intel && irb->wrapped_stencil) { - _mesa_reference_renderbuffer(&irb->wrapped_stencil, NULL); - } + intel_region_release(&irb->region); + intel_region_release(&irb->hiz_region); + + _mesa_reference_renderbuffer(&irb->wrapped_depth, NULL); + _mesa_reference_renderbuffer(&irb->wrapped_stencil, NULL); free(irb); } From f39476b234960cd0fb794a43a3eece440367cba2 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 29 Jun 2011 11:45:04 -0700 Subject: [PATCH 053/113] i915: Fix leak of ViewportMatrix data on context destroy. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30217 --- src/mesa/drivers/dri/intel/intel_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 0402d83e9e3..292b7b034ee 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -922,6 +922,8 @@ intelDestroyContext(__DRIcontext * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); + _math_matrix_dtr(&intel->ViewportMatrix); + FREE(intel); driContextPriv->driverPrivate = NULL; } From a7a2704ab48edb4853060b4a292d9388eb909e61 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 7 Jul 2011 08:59:07 -0700 Subject: [PATCH 054/113] intel: Remove dead comment about software clears -- it's handled just above. --- src/mesa/drivers/dri/intel/intel_clear.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 5a96232107e..dfca03c14bf 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -118,7 +118,6 @@ intelClear(struct gl_context *ctx, GLbitfield mask) /* HW color buffers (front, back, aux, generic FBO, etc) */ if (colorMask == ~0) { /* clear all R,G,B,A */ - /* XXX FBO: need to check if colorbuffers are software RBOs! */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { From 066bee64e1611093c7e641ba77bbd43f70d08cec Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 6 Jul 2011 11:31:00 -0700 Subject: [PATCH 055/113] intel: Fix use of freed buffer if glBitmap is called after a swap. Regions looked up from the framebuffer are invalid after intel_prepare_render(). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30266 Tested-by: Thomas Jones --- src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 64c7acce1e9..86d0ef2d748 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -175,7 +175,7 @@ do_blit_bitmap( struct gl_context *ctx, const GLubyte *bitmap ) { struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); + struct intel_region *dst; struct gl_framebuffer *fb = ctx->DrawBuffer; GLfloat tmpColor[4]; GLubyte ubcolor[4]; @@ -198,6 +198,9 @@ do_blit_bitmap( struct gl_context *ctx, return GL_FALSE; } + intel_prepare_render(intel); + dst = intel_drawbuf_region(intel); + if (!dst) return GL_FALSE; @@ -226,8 +229,6 @@ do_blit_bitmap( struct gl_context *ctx, if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) return GL_FALSE; - intel_prepare_render(intel); - /* Clip to buffer bounds and scissor. */ if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax, From 61efad6865978b641cdacc4c087c2e623b1eab11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Thu, 7 Jul 2011 23:41:06 +0100 Subject: [PATCH 056/113] scons: Generate libGL.so.1.5 and libGL.so.1 symlinks. In build/xxx/src/gallium/targets/libgl-xlib/SConscript --- src/gallium/targets/libgl-xlib/SConscript | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript index 7d5d9bc47a9..ad8b0992e46 100644 --- a/src/gallium/targets/libgl-xlib/SConscript +++ b/src/gallium/targets/libgl-xlib/SConscript @@ -48,14 +48,17 @@ if False: env.Append(CPPDEFINES = 'GALLIUM_CELL') env.Prepend(LIBS = [cell]) -# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions -libgl = env.SharedLibrary( +# libGL.so.1.5 +libgl_1_5 = env.SharedLibrary( target ='GL', source = sources, + SHLIBSUFFIX = env['SHLIBSUFFIX'] + '.1.5', ) -if False: - # XXX: Only install this libGL.so if DRI not enabled - libgl = env.InstallSharedLibrary(libgl, version=(1, 5)) +# libGL.so.1 +libgl = env.subst('${SHLIBPREFIX}GL${SHLIBSUFFIX}') +libgl_1 = libgl + '.1' +env.Command(libgl_1, libgl_1_5, "ln -sf ${SOURCE.file} ${TARGET}") +env.Command(libgl, libgl_1, "ln -sf ${SOURCE.file} ${TARGET}") env.Alias('libgl-xlib', libgl) From 7eb7d67d50fccb64248d1fc6f490895048d7d32e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 7 Jul 2011 16:47:59 -0600 Subject: [PATCH 057/113] glsl: use casts to silence warning --- src/glsl/linker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 265da84e5a9..34b64837a46 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1248,7 +1248,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, */ const int generic_base = (target_index == MESA_SHADER_VERTEX) - ? VERT_ATTRIB_GENERIC0 : FRAG_RESULT_DATA0; + ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; const enum ir_variable_mode direction = (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out; From d8f65c07e9f3a5948c8bee95482bcab651b33c01 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 7 Jul 2011 17:29:28 -0600 Subject: [PATCH 058/113] intel: add null src pointer check in intel_region_reference() Fixes segfault when running cubemap demo on i945. This happened when intel_region_reference() was called in i915_set_draw_region() with depth_region=NULL. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/intel/intel_regions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 6cf37c4c40c..4c4945c7941 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -271,7 +271,8 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) if (*dst) intel_region_release(dst); - src->refcount++; + if (src) + src->refcount++; *dst = src; } } From 8c1a2e128ea44bae6dc89524116c4a170cdc2b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Thu, 7 Jul 2011 21:58:31 -0700 Subject: [PATCH 059/113] i915g: Make the optimizer more generic. --- src/gallium/drivers/i915/i915_fpc_optimize.c | 85 +++++++++++++------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index e60c27e5473..2b739e9ccb8 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -55,18 +55,15 @@ static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_ } static boolean is_unswizzled(struct i915_full_src_register* r, - int sx, - int sy, - int sz, - int sw) + unsigned write_mask) { - if (sx && r->Register.SwizzleX != TGSI_SWIZZLE_X) + if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) return FALSE; - if (sy && r->Register.SwizzleY != TGSI_SWIZZLE_Y) + if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) return FALSE; - if (sz && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) + if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) return FALSE; - if (sw && r->Register.SwizzleW != TGSI_SWIZZLE_W) + if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) return FALSE; return TRUE; } @@ -89,6 +86,35 @@ static unsigned op_neutral_element(unsigned opcode) return TGSI_SWIZZLE_ZERO; } +/* + * Sets the swizzle to the neutral element for the operation for the bits + * of writemask which are set, swizzle to identity otherwise. + */ +static void set_neutral_element_swizzle(struct i915_full_src_register* r, + unsigned write_mask, + unsigned neutral) +{ + if ( write_mask & TGSI_WRITEMASK_X ) + r->Register.SwizzleX = neutral; + else + r->Register.SwizzleX = TGSI_SWIZZLE_X; + + if ( write_mask & TGSI_WRITEMASK_Y ) + r->Register.SwizzleY = neutral; + else + r->Register.SwizzleY = TGSI_SWIZZLE_Y; + + if ( write_mask & TGSI_WRITEMASK_Z ) + r->Register.SwizzleZ = neutral; + else + r->Register.SwizzleZ = TGSI_SWIZZLE_Z; + + if ( write_mask & TGSI_WRITEMASK_W ) + r->Register.SwizzleW = neutral; + else + r->Register.SwizzleW = TGSI_SWIZZLE_W; +} + /* * Optimize away things like: * MUL OUT[0].xyz, TEMP[1], TEMP[2] @@ -96,47 +122,52 @@ static unsigned op_neutral_element(unsigned opcode) * into: * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] * This is useful for optimizing texenv. - * XXX also handle swizzles other than XYZ/W */ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) { if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && op_commutes(current->FullInstruction.Instruction.Opcode) && - current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && - current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && - next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && - is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && - is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && - is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; - current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; - current->FullInstruction.Src[0].Register.SwizzleW = op_neutral_element(current->FullInstruction.Instruction.Opcode); - current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; return; } if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && op_commutes(current->FullInstruction.Instruction.Opcode) && - current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && - current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && - next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && - is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && - is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && - is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; - current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; - current->FullInstruction.Src[1].Register.SwizzleW = op_neutral_element(current->FullInstruction.Instruction.Opcode); - current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; return; } } From a65e9706035c0e348307e76fdeeed0910ec8c68e Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 8 Jul 2011 08:26:29 +0200 Subject: [PATCH 060/113] mesa/st: Fix piglit read-front with new drawable invalidation v2 When the state tracker adds a front buffer, nothing triggers a validate drawable call, since the state tracker manager is never notified. Force a validate drawable call by invalidating the framebuffer's stamp, so that the window system's renderbuffer (if any) is picked up. This fixes bug 38988 https://bugs.freedesktop.org/show_bug.cgi?id=38988 Signed-off-by: Thomas Hellstrom --- src/mesa/state_tracker/st_manager.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index a8c4b5c3f49..7bd82aae206 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -918,6 +918,15 @@ st_manager_add_color_renderbuffer(struct st_context *st, return FALSE; st_framebuffer_update_attachments(stfb); + + /* + * Force a call to the state tracker manager to validate the + * new renderbuffer. It might be that there is a window system + * renderbuffer available. + */ + if(stfb->iface) + stfb->iface_stamp = p_atomic_read(&stfb->iface->stamp) - 1; + st_invalidate_state(st->ctx, _NEW_BUFFERS); return TRUE; From 292148dc4b18958d4447df7596311bd2f09fd44f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Jun 2011 19:01:25 +0200 Subject: [PATCH 061/113] st/mesa: use the first non-VOID channel in st_format_datatype Otherwise PIPE_FORMAT_X8B8G8R8_UNORM and friends would fail. NOTE: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_format.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index fa5d8f5050a..3260297c6c1 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -68,10 +68,18 @@ GLenum st_format_datatype(enum pipe_format format) { const struct util_format_description *desc; + int i; desc = util_format_description(format); assert(desc); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { if (format == PIPE_FORMAT_B5G5R5A1_UNORM || format == PIPE_FORMAT_B5G6R5_UNORM) { @@ -85,21 +93,26 @@ st_format_datatype(enum pipe_format format) } else { const GLuint size = format_max_bits(format); + + assert(i < 4); + if (i == 4) + return GL_NONE; + if (size == 8) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_BYTE; else return GL_BYTE; } else if (size == 16) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_SHORT; else return GL_SHORT; } else { assert( size <= 32 ); - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_INT; else return GL_INT; From 7de28e80dcd4239a780b0f5fdc6e61e6e56a68aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Jun 2011 18:57:59 +0200 Subject: [PATCH 062/113] st/mesa: handle float formats in st_format_datatype NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_format.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 3260297c6c1..d1995f1ee1d 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -85,6 +85,10 @@ st_format_datatype(enum pipe_format format) format == PIPE_FORMAT_B5G6R5_UNORM) { return GL_UNSIGNED_SHORT; } + else if (format == PIPE_FORMAT_R11G11B10_FLOAT || + format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + return GL_FLOAT; + } else if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format == PIPE_FORMAT_S8_USCALED_Z24_UNORM || format == PIPE_FORMAT_Z24X8_UNORM || @@ -105,18 +109,26 @@ st_format_datatype(enum pipe_format format) return GL_BYTE; } else if (size == 16) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_HALF_FLOAT; if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_SHORT; else return GL_SHORT; } - else { - assert( size <= 32 ); + else if (size <= 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_FLOAT; if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_INT; else return GL_INT; } + else { + assert(size == 64); + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT); + return GL_DOUBLE; + } } } else if (format == PIPE_FORMAT_UYVY) { From fc98444bd58960e6cab28423365923bc7e7af3e1 Mon Sep 17 00:00:00 2001 From: Gustaw Smolarczyk Date: Wed, 6 Jul 2011 23:12:11 +0200 Subject: [PATCH 063/113] gallivm: Fix build with llvm-3.0 LLVM 3.0svn changes pretty rapidly. The change in Target->createMCInstPrinter() signature which inspired commits 40ae214067673edbda79371969d1730b6194d83e and 92e29dc5b0474c073b0f05d60629fc6c3decfca4 has been reverted. Signed-off-by: Gustaw Smolarczyk Signed-off-by: Brian Paul --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 01e660ef7d9..29dfb868d95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -207,21 +207,13 @@ lp_disassemble(const void* func) } raw_debug_ostream Out; -#if HAVE_LLVM >= 0x0300 - TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), ""); -#else - TargetMachine *TM = T->createTargetMachine(Triple, ""); -#endif #if HAVE_LLVM >= 0x0300 unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #else int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #endif -#if HAVE_LLVM >= 0x0300 - OwningPtr Printer( - T->createMCInstPrinter(*TM, AsmPrinterVariant, *AsmInfo)); -#elif HAVE_LLVM >= 0x0208 +#if HAVE_LLVM >= 0x0208 OwningPtr Printer( T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo)); #else @@ -233,6 +225,12 @@ lp_disassemble(const void* func) return; } +#if HAVE_LLVM >= 0x0300 + TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), ""); +#else + TargetMachine *TM = T->createTargetMachine(Triple, ""); +#endif + const TargetInstrInfo *TII = TM->getInstrInfo(); /* From 5fb79fc69f56cf2d8d44e4c6c2d8b862bc631139 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 5 Jul 2011 11:29:40 -0700 Subject: [PATCH 064/113] glsl: Remove unused function prototypes. No functional change. Remove prototypes for do_mod_to_fract() and do_sub_to_add_neg(), which haven't existed since November 2010. --- src/glsl/ir_optimization.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index dd265673c55..59a040751d9 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -56,10 +56,8 @@ bool do_if_simplification(exec_list *instructions); bool do_discard_simplification(exec_list *instructions); bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); bool do_mat_op_to_vec(exec_list *instructions); -bool do_mod_to_fract(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions); bool do_structure_splitting(exec_list *instructions); -bool do_sub_to_add_neg(exec_list *instructions); bool do_swizzle_swizzle(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); From f4830be938c8fa33086f73cab19a53ab3e14cb9c Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 29 Jun 2011 15:30:40 -0700 Subject: [PATCH 065/113] glsl: Make ir_reader able to read plain (return) statements. Previously ir_reader was only able to handle return of non-void. This patch is necessary in order to allow optimization passes to be tested in isolation. Reviewed-by: Kenneth Graunke --- src/glsl/ir_reader.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index 30df257be2f..f3a621734ba 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -482,19 +482,21 @@ ir_reader::read_return(s_expression *expr) { s_expression *s_retval; - s_pattern pat[] = { "return", s_retval}; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (return )"); + s_pattern return_value_pat[] = { "return", s_retval}; + s_pattern return_void_pat[] = { "return" }; + if (MATCH(expr, return_value_pat)) { + ir_rvalue *retval = read_rvalue(s_retval); + if (retval == NULL) { + ir_read_error(NULL, "when reading return value"); + return NULL; + } + return new(mem_ctx) ir_return(retval); + } else if (MATCH(expr, return_void_pat)) { + return new(mem_ctx) ir_return; + } else { + ir_read_error(expr, "expected (return ) or (return)"); return NULL; } - - ir_rvalue *retval = read_rvalue(s_retval); - if (retval == NULL) { - ir_read_error(NULL, "when reading return value"); - return NULL; - } - - return new(mem_ctx) ir_return(retval); } From e2c748aec5363981a05f21f26a0c4d37ccf6419d Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 29 Jun 2011 10:28:40 -0700 Subject: [PATCH 066/113] glsl: Add explanatory comments to lower_jumps.cpp. No functional change. Reviewed-by: Kenneth Graunke --- src/glsl/lower_jumps.cpp | 336 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 322 insertions(+), 14 deletions(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index dd2601d1aad..da85c6b49c0 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -60,12 +60,76 @@ #include #include "ir.h" +/** + * Enum recording the result of analyzing how control flow might exit + * an IR node. + * + * Each possible value of jump_strength indicates a strictly stronger + * guarantee on control flow than the previous value. + * + * The ordering of strengths roughly reflects the way jumps are + * lowered: jumps with higher strength tend to be lowered to jumps of + * lower strength. Accordingly, strength is used as a heuristic to + * determine which lowering to perform first. + * + * This enum is also used by get_jump_strength() to categorize + * instructions as either break, continue, return, or other. When + * used in this fashion, strength_always_clears_execute_flag is not + * used. + * + * The control flow analysis made by this optimization pass makes two + * simplifying assumptions: + * + * - It ignores discard instructions, since they are lowered by a + * separate pass (lower_discard.cpp). + * + * - It assumes it is always possible for control to flow from a loop + * to the instruction immediately following it. Technically, this + * is not true (since all execution paths through the loop might + * jump back to the top, or return from the function). + * + * Both of these simplifying assumtions are safe, since they can never + * cause reachable code to be incorrectly classified as unreachable; + * they can only do the opposite. + */ enum jump_strength { + /** + * Analysis has produced no guarantee on how control flow might + * exit this IR node. It might fall out the bottom (with or + * without clearing the execute flag, if present), or it might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_none, + + /** + * The only way control can fall out the bottom of this node is + * through a code path that clears the execute flag. It might also + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_always_clears_execute_flag, + + /** + * Control cannot fall out the bottom of this node. It might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_continue, + + /** + * Control cannot fall out the bottom of this node, or continue the + * top of the innermost enclosing loop. It can only break out of + * it or return from the function. + */ strength_break, + + /** + * Control cannot fall out the bottom of this node, continue to the + * top of the innermost enclosing loop, or break out of it. It can + * only return from the function. + */ strength_return }; @@ -180,6 +244,27 @@ struct function_record }; struct ir_lower_jumps_visitor : public ir_control_flow_visitor { + /* Postconditions: on exit of any visit() function: + * + * ANALYSIS: this->block.min_strength, + * this->block.may_clear_execute_flag, and + * this->loop.may_set_return_flag are updated to reflect the + * characteristics of the visited statement. + * + * DEAD_CODE_ELIMINATION: If this->block.min_strength is not + * strength_none, the visited node is at the end of its exec_list. + * In other words, any unreachable statements that follow the + * visited statement in its exec_list have been removed. + * + * CONTAINED_JUMPS_LOWERED: If the visited statement contains other + * statements, then should_lower_jump() is false for all of the + * return, break, or continue statements it contains. + * + * Note that visiting a jump does not lower it. That is the + * responsibility of the statement (or function signature) that + * contains the jump. + */ + bool progress; struct function_record function; @@ -220,18 +305,57 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { virtual void visit(class ir_loop_jump * ir) { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered jump + * instruction can't change any flags. + */ this->block.min_strength = ir->is_break() ? strength_break : strength_continue; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ } virtual void visit(class ir_return * ir) { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered return + * instruction can't change any flags. + */ this->block.min_strength = strength_return; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ } virtual void visit(class ir_discard * ir) { + /* Nothing needs to be done. The ANALYSIS and + * DEAD_CODE_ELIMINATION postconditions are already satisfied, + * because discard statements are ignored by this optimization + * pass. The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because discard statements can't contain other + * statements. + */ } enum jump_strength get_jump_strength(ir_instruction* ir) @@ -304,18 +428,34 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { block_record block_records[2]; ir_jump* jumps[2]; + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * unconditional jumps at the end of ir->then_instructions and + * ir->else_instructions, which are handled below. + */ block_records[0] = visit_block(&ir->then_instructions); block_records[1] = visit_block(&ir->else_instructions); retry: /* we get here if we put code after the if inside a branch */ - for(unsigned i = 0; i < 2; ++i) { - exec_list& list = i ? ir->else_instructions : ir->then_instructions; - jumps[i] = 0; - if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) - jumps[i] = (ir_jump*)list.get_tail(); - } + /* Determine which of ir->then_instructions and + * ir->else_instructions end with an unconditional jump. + */ + for(unsigned i = 0; i < 2; ++i) { + exec_list& list = i ? ir->else_instructions : ir->then_instructions; + jumps[i] = 0; + if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) + jumps[i] = (ir_jump*)list.get_tail(); + } + + /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED + * postcondition by lowering jumps in both then_instructions and + * else_instructions. + */ for(;;) { + /* Determine the types of the jumps that terminate + * ir->then_instructions and ir->else_instructions. + */ jump_strength jump_strengths[2]; for(unsigned i = 0; i < 2; ++i) { @@ -326,7 +466,12 @@ retry: /* we get here if we put code after the if inside a branch */ jump_strengths[i] = strength_none; } - /* move both jumps out if possible */ + /* If both code paths end in a jump, and the jumps are the + * same, and we are pulling out jumps, replace them with a + * single jump that comes after the if instruction. The new + * jump will be visited next, and it will be lowered if + * necessary by the loop or conditional that encloses it. + */ if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) { bool unify = true; if(jump_strengths[0] == strength_continue) @@ -344,10 +489,19 @@ retry: /* we get here if we put code after the if inside a branch */ jumps[1]->remove(); this->progress = true; + /* Update jumps[] to reflect the fact that the jumps + * are gone, and update block_records[] to reflect the + * fact that control can now flow to the next + * instruction. + */ jumps[0] = 0; jumps[1] = 0; block_records[0].min_strength = strength_none; block_records[1].min_strength = strength_none; + + /* The CONTAINED_JUMPS_LOWERED postcondition is now + * satisfied, so we can break out of the loop. + */ break; } } @@ -367,9 +521,18 @@ retry: /* we get here if we put code after the if inside a branch */ else if(should_lower[1]) lower = 1; else + /* Neither code path ends in a jump that needs to be + * lowered, so the CONTAINED_JUMPS_LOWERED postcondition + * is satisfied and we can break out of the loop. + */ break; if(jump_strengths[lower] == strength_return) { + /* To lower a return, we create a return flag (if the + * function doesn't have one already) and add instructions + * that: 1. store the return value (if this function has a + * non-void return) and 2. set the return flag + */ ir_variable* return_flag = this->function.get_return_flag(); if(!this->function.signature->return_type->is_void()) { ir_variable* return_value = this->function.get_return_value(); @@ -378,29 +541,58 @@ retry: /* we get here if we put code after the if inside a branch */ jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL)); this->loop.may_set_return_flag = true; if(this->loop.loop) { + /* If we are in a loop, replace the return instruction + * with a break instruction, and then loop so that the + * break instruction can be lowered if necessary. + */ ir_loop_jump* lowered = 0; lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + /* Note: we must update block_records and jumps to + * reflect the fact that the control path has been + * altered from a return to a break. + */ block_records[lower].min_strength = strength_break; jumps[lower]->replace_with(lowered); jumps[lower] = lowered; - } else + } else { + /* If we are not in a loop, we then proceed as we would + * for a continue statement (set the execute flag to + * false to prevent the rest of the function from + * executing). + */ goto lower_continue; + } this->progress = true; } else if(jump_strengths[lower] == strength_break) { - /* We can't lower to an actual continue because that would execute the increment. + /* To lower a break, we create a break flag (if the loop + * doesn't have one already) and add an instruction that + * sets it. * - * In the lowered code, we instead put the break check between the this->loop body and the increment, - * which is impossible with a real continue as defined by the GLSL IR currently. + * Then we proceed as we would for a continue statement + * (set the execute flag to false to prevent the rest of + * the loop body from executing). * - * Smarter options (such as undoing the increment) are possible but it's not worth implementing them, - * because if break is lowered, continue is almost surely lowered too. + * The visit() function for the loop will ensure that the + * break flag is checked after executing the loop body. */ jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0)); goto lower_continue; } else if(jump_strengths[lower] == strength_continue) { lower_continue: + /* To lower a continue, we create an execute flag (if the + * loop doesn't have one already) and replace the continue + * with an instruction that clears it. + * + * Note that this code path gets exercised when lowering + * return statements that are not inside a loop, so + * this->loop must be initialized even outside of loops. + */ ir_variable* execute_flag = this->loop.get_execute_flag(); jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0)); + /* Note: we must update block_records and jumps to reflect + * the fact that the control path has been altered to an + * instruction that clears the execute flag. + */ jumps[lower] = 0; block_records[lower].min_strength = strength_always_clears_execute_flag; block_records[lower].may_clear_execute_flag = true; @@ -411,6 +603,12 @@ lower_continue: /* move out a jump out if possible */ if(pull_out_jumps) { + /* If one of the branches ends in a jump, and control cannot + * fall out the bottom of the other branch, then we can move + * the jump after the if. + * + * Set move_out to the branch we are moving a jump out of. + */ int move_out = -1; if(jumps[0] && block_records[1].min_strength >= strength_continue) move_out = 0; @@ -421,22 +619,46 @@ lower_continue: { jumps[move_out]->remove(); ir->insert_after(jumps[move_out]); + /* Note: we must update block_records and jumps to reflect + * the fact that the jump has been moved out of the if. + */ jumps[move_out] = 0; block_records[move_out].min_strength = strength_none; this->progress = true; } } + /* Now satisfy the ANALYSIS postcondition by setting + * this->block.min_strength and + * this->block.may_clear_execute_flag based on the + * characteristics of the two branches. + */ if(block_records[0].min_strength < block_records[1].min_strength) this->block.min_strength = block_records[0].min_strength; else this->block.min_strength = block_records[1].min_strength; this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag; + /* Now we need to clean up the instructions that follow the + * if. + * + * If those instructions are unreachable, then satisfy the + * DEAD_CODE_ELIMINATION postcondition by eliminating them. + * Otherwise that postcondition is already satisfied. + */ if(this->block.min_strength) truncate_after_instruction(ir); else if(this->block.may_clear_execute_flag) { + /* If the "if" instruction might clear the execute flag, then + * we need to guard any instructions that follow so that they + * are only executed if the execute flag is set. + * + * If one of the branches of the "if" always clears the + * execute flag, and the other branch never clears it, then + * this is easy: just move all the instructions following the + * "if" into the branch that never clears it. + */ int move_into = -1; if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag) move_into = 1; @@ -451,14 +673,34 @@ lower_continue: if(!next->is_tail_sentinel()) { move_outer_block_inside(ir, list); + /* If any instructions moved, then we need to visit + * them (since they are now inside the "if"). Since + * block_records[move_into] is in its default state + * (see assertion above), we can safely replace + * block_records[move_into] with the result of this + * analysis. + */ exec_list list; list.head = next; block_records[move_into] = visit_block(&list); + /* + * Then we need to re-start our jump lowering, since one + * of the instructions we moved might be a jump that + * needs to be lowered. + */ this->progress = true; goto retry; } } else { + /* If we get here, then the simple case didn't apply; we + * need to actually guard the instructions that follow. + * + * To avoid creating unnecessarily-deep nesting, first + * look through the instructions that follow and unwrap + * any instructions that that are already wrapped in the + * appropriate guard. + */ ir_instruction* ir_after; for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();) { @@ -479,6 +721,9 @@ lower_continue: this->progress = true; } + /* Then, wrap all the instructions that follow in a single + * guard. + */ if(!ir->get_next()->is_tail_sentinel()) { assert(this->loop.execute_flag); ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag)); @@ -493,29 +738,87 @@ lower_continue: virtual void visit(ir_loop *ir) { + /* Visit the body of the loop, with a fresh data structure in + * this->loop so that the analysis we do here won't bleed into + * enclosing loops. + * + * We assume that all code after a loop is reachable from the + * loop (see comments on enum jump_strength), so the + * DEAD_CODE_ELIMINATION postcondition is automatically + * satisfied, as is the block.min_strength portion of the + * ANALYSIS postcondition. + * + * The block.may_clear_execute_flag portion of the ANALYSIS + * postcondition is automatically satisfied because execute + * flags do not propagate outside of loops. + * + * The loop.may_set_return_flag portion of the ANALYSIS + * postcondition is handled below. + */ ++this->function.nesting_depth; loop_record saved_loop = this->loop; this->loop = loop_record(this->function.signature, ir); + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * an unconditional continue or return at the bottom of the + * loop. + */ block_record body = visit_block(&ir->body_instructions); if(body.min_strength >= strength_break) { - /* FINISHME: turn the this->loop into an if, or replace it with its body */ + /* FINISHME: If the min_strength of the loop body is + * strength_break or strength_return, that means that it + * isn't a loop at all, since control flow always leaves the + * body of the loop via break or return. In principle the + * loop could be eliminated in this case. This optimization + * is not implemented yet. + */ } if(this->loop.break_flag) { + /* If a break flag was generated while visiting the body of + * the loop, then at least one break was lowered, so we need + * to generate an if statement at the end of the loop that + * does a "break" if the break flag is set. The break we + * generate won't violate the CONTAINED_JUMPS_LOWERED + * postcondition, because should_lower_jump() always returns + * false for a break that happens at the end of a loop. + */ ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); ir->body_instructions.push_tail(break_if); } + /* If the body of the loop may set the return flag, then at + * least one return was lowered to a break, so we need to ensure + * that the return flag is checked after the body of the loop is + * executed. + */ if(this->loop.may_set_return_flag) { assert(this->function.return_flag); + /* Generate the if statement to check the return flag */ ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag)); + /* Note: we also need to propagate the knowledge that the + * return flag may get set to the outer context. This + * satisfies the loop.may_set_return_flag part of the + * ANALYSIS postcondition. + */ saved_loop.may_set_return_flag = true; if(saved_loop.loop) + /* If this loop is nested inside another one, then the if + * statement that we generated should break out of that + * loop if the return flag is set. Caller will lower that + * break statement if necessary. + */ return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); else + /* Otherwise, all we need to do is ensure that the + * instructions that follow are only executed if the + * return flag is clear. We can do that by moving those + * instructions into the else clause of the generated if + * statement. + */ move_outer_block_inside(ir, &return_if->else_instructions); ir->insert_after(return_if); } @@ -536,6 +839,11 @@ lower_continue: this->loop = loop_record(ir); assert(!this->loop.loop); + + /* Visit the body of the function to lower any jumps that occur + * in it, except possibly an unconditional return statement at + * the end of it. + */ visit_block(&ir->body); if(this->function.return_value) From dbaa2e627effbe1361e1a69c23cf247cf86f2709 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 11:59:32 -0700 Subject: [PATCH 067/113] glsl: Refactor logic for determining whether to lower return statements. Previously, do_lower_jumps.cpp determined whether to lower return statements in ir_lower_jumps_visitor::should_lower_jumps(). Moved this logic to ir_lower_jumps_visitor::visit(ir_function_signature *), so that it can be used in determining whether to lower a return statement at the end of a function. --- src/glsl/lower_jumps.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index da85c6b49c0..fa247c6c9b9 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -210,16 +210,17 @@ struct function_record ir_function_signature* signature; ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */ ir_variable* return_value; - bool is_main; + bool lower_return; unsigned nesting_depth; - function_record(ir_function_signature* p_signature = 0) + function_record(ir_function_signature* p_signature = 0, + bool lower_return = false) { this->signature = p_signature; this->return_flag = 0; this->return_value = 0; this->nesting_depth = 0; - this->is_main = this->signature && (strcmp(this->signature->function_name(), "main") == 0); + this->lower_return = lower_return; } ir_variable* get_return_flag() @@ -398,10 +399,8 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { /* never lower return at the end of a this->function */ if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) lower = false; - else if (this->function.is_main) - lower = lower_main_return; else - lower = lower_sub_return; + lower = this->function.lower_return; break; } return lower; @@ -833,9 +832,15 @@ lower_continue: assert(!this->function.signature); assert(!this->loop.loop); + bool lower_return; + if (strcmp(ir->function_name(), "main") == 0) + lower_return = lower_main_return; + else + lower_return = lower_sub_return; + function_record saved_function = this->function; loop_record saved_loop = this->loop; - this->function = function_record(ir); + this->function = function_record(ir, lower_return); this->loop = loop_record(ir); assert(!this->loop.loop); From afc9a50fba39df520046019c6993d7b7559329ea Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 12:14:07 -0700 Subject: [PATCH 068/113] glsl: Lower unconditional return statements. Previously, lower_jumps.cpp only lowered return statements that appeared inside of an if statement. Without this patch, lower_jumps.cpp might not lower certain return statements, causing some back-ends to fail (as in bug #36669). Fixes unit test test_lower_returns_1. --- src/glsl/lower_jumps.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index fa247c6c9b9..eceba09266e 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -851,6 +851,20 @@ lower_continue: */ visit_block(&ir->body); + /* If the body ended in an unconditional return of non-void, + * then we don't need to lower it because it's the one canonical + * return. + * + * If the body ended in a return of void, eliminate it because + * it is redundant. + */ + if (ir->return_type->is_void() && + get_jump_strength((ir_instruction *) ir->body.get_tail())) { + ir_jump *jump = (ir_jump *) ir->body.get_tail(); + assert (jump->ir_type == ir_type_return); + jump->remove(); + } + if(this->function.return_value) ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value))); From 03145ba655ad9173a74b853843eccaae78ff392f Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 18:26:05 -0700 Subject: [PATCH 069/113] glsl: lower unconditional returns and continues in loops. Previously, lower_jumps.cpp would only lower return and continue statements that appeared inside conditionals. This patch makes it lower unconditional returns and continue statements that occur inside a loop. Such unconditional flow control statements would be unlikely to be explicitly coded by a reasonable user, however they might arise as a result of other optimizations. Without this patch, lower_jumps.cpp might not lower certain return and continue statements, causing some backends to fail. Fixes unit tests test_lower_return_void_at_end_of_loop and test_remove_continue_at_end_of_loop. --- src/glsl/lower_jumps.cpp | 62 ++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index eceba09266e..cbdd8eaa8be 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -304,6 +304,43 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { } } + /** + * Insert the instructions necessary to lower a return statement, + * before the given return instruction. + */ + void insert_lowered_return(ir_return *ir) + { + ir_variable* return_flag = this->function.get_return_flag(); + if(!this->function.signature->return_type->is_void()) { + ir_variable* return_value = this->function.get_return_value(); + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_value), + ir->value)); + } + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_flag), + new (ir) ir_constant(true))); + this->loop.may_set_return_flag = true; + } + + /** + * If the given instruction is a return, lower it to instructions + * that store the return value (if there is one), set the return + * flag, and then break. + * + * It is safe to pass NULL to this function. + */ + void lower_return_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_return) { + return; + } + insert_lowered_return((ir_return*)ir); + ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + } + virtual void visit(class ir_loop_jump * ir) { /* Eliminate all instructions after each one, since they are @@ -532,13 +569,7 @@ retry: /* we get here if we put code after the if inside a branch */ * that: 1. store the return value (if this function has a * non-void return) and 2. set the return flag */ - ir_variable* return_flag = this->function.get_return_flag(); - if(!this->function.signature->return_type->is_void()) { - ir_variable* return_value = this->function.get_return_value(); - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_value), ((ir_return*)jumps[lower])->value, NULL)); - } - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL)); - this->loop.may_set_return_flag = true; + insert_lowered_return((ir_return*)jumps[lower]); if(this->loop.loop) { /* If we are in a loop, replace the return instruction * with a break instruction, and then loop so that the @@ -761,10 +792,25 @@ lower_continue: /* Recursively lower nested jumps. This satisfies the * CONTAINED_JUMPS_LOWERED postcondition, except in the case of * an unconditional continue or return at the bottom of the - * loop. + * loop, which are handled below. */ block_record body = visit_block(&ir->body_instructions); + /* If the loop ends in an unconditional continue, eliminate it + * because it is redundant. + */ + ir_instruction *ir_last + = (ir_instruction *) ir->body_instructions.get_tail(); + if (get_jump_strength(ir_last) == strength_continue) { + ir_last->remove(); + } + + /* If the loop ends in an unconditional return, and we are + * lowering returns, lower it. + */ + if (this->function.lower_return) + lower_return_unconditionally(ir_last); + if(body.min_strength >= strength_break) { /* FINISHME: If the min_strength of the loop body is * strength_break or strength_return, that means that it From 382cee91a4bbbee45897239e6802ccaa5a5ad9c3 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 12:36:23 -0700 Subject: [PATCH 070/113] glsl: Use foreach_list in lower_jumps.cpp The visitor class in lower_jumps.cpp never removes or replaces the instruction being visited, but it frequently alters or removes the instructions that follow it. Therefore, to make sure the altered IR is visited, it needs to iterate through exec_lists using foreach_list rather than visit_exec_list(). Without this patch, lower_jumps.cpp may require multiple passes in order to lower all jumps. This results in sub-optimal output because lower_jumps.cpp produces a brand new set of temporary variables each time it is run, and the redundant temporary variables are not guaranteed to be eliminated by later optimization passes. Also, certain invariants assumed by lower_jumps.cpp may fail to hold, causing assertion failures. Fixes unit tests test_lower_pulled_out_jump, test_lower_unified_returns, test_lower_guarded_conditional_break, test_lower_return_non_void_at_end_of_loop, and test_lower_returns_3. Reviewed-by: Kenneth Graunke --- src/glsl/lower_jumps.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index cbdd8eaa8be..199a0184fee 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -445,9 +445,20 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { block_record visit_block(exec_list* list) { + /* Note: since visiting a node may change that node's next + * pointer, we can't use visit_exec_list(), because + * visit_exec_list() caches the node's next pointer before + * visiting it. So we use foreach_list() instead. + * + * foreach_list() isn't safe if the node being visited gets + * removed, but fortunately this visitor doesn't do that. + */ + block_record saved_block = this->block; this->block = block_record(); - visit_exec_list(list, this); + foreach_list(node, list) { + ((ir_instruction *) node)->accept(this); + } block_record ret = this->block; this->block = saved_block; return ret; From e71b4ab8a64bf978b2036976a41e30996eebb0c8 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 15:33:36 -0700 Subject: [PATCH 071/113] glsl: In lower_jumps.cpp, lower both branches of a conditional. Previously, lower_jumps.cpp would break out of its loop after lowering a jump instruction in just the then- or else-branch of a conditional, and it would fail to lower a jump instruction occurring in the other branch. Without this patch, lower_jumps.cpp may require multiple passes in order to lower all jumps. This results in sub-optimal output because lower_jumps.cpp produces a brand new set of temporary variables each time it is run, and the redundant temporary variables are not guaranteed to be eliminated by later optimization passes. Fixes unit test test_lower_returns_4. --- src/glsl/lower_jumps.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index 199a0184fee..07897825b49 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -638,7 +638,10 @@ lower_continue: block_records[lower].min_strength = strength_always_clears_execute_flag; block_records[lower].may_clear_execute_flag = true; this->progress = true; - break; + + /* Let the loop run again, in case the other branch of the + * if needs to be lowered too. + */ } } From 067c9d7bd776260298ceabda026425ed7e4eb161 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 17:29:35 -0700 Subject: [PATCH 072/113] glsl: Lower break instructions when necessary at the end of a loop. Normally lower_jumps.cpp doesn't need to lower a break instruction that occurs at the end of a loop, because all back-ends can produce proper GPU instructions for a break instruction in this "canonical" location. However, if other break instructions within the loop are already being lowered, then a break instruction at the end of the loop needs to be lowered too, since after the optimization is complete a new conditional break will be inserted at the end of the loop. Without this patch, lower_jumps.cpp may require multiple passes in order to lower all jumps. This results in sub-optimal output because lower_jumps.cpp produces a brand new set of temporary variables each time it is run, and the redundant temporary variables are not guaranteed to be eliminated by later optimization passes. Fixes unit test test_lower_breaks_6. --- src/glsl/lower_jumps.cpp | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index 07897825b49..61874990a94 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -341,6 +341,50 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); } + /** + * Create the necessary instruction to replace a break instruction. + */ + ir_instruction *create_lowered_break() + { + void *ctx = this->function.signature; + return new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(this->loop.get_break_flag()), + new(ctx) ir_constant(true), + 0); + } + + /** + * If the given instruction is a break, lower it to an instruction + * that sets the break flag, without consulting + * should_lower_jump(). + * + * It is safe to pass NULL to this function. + */ + void lower_break_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_break) { + return; + } + ir->replace_with(create_lowered_break()); + } + + /** + * If the block ends in a conditional or unconditional break, lower + * it, even though should_lower_jump() says it needn't be lowered. + */ + void lower_final_breaks(exec_list *block) + { + ir_instruction *ir = (ir_instruction *) block->get_tail(); + lower_break_unconditionally(ir); + ir_if *ir_if = ir->as_if(); + if (ir_if) { + lower_break_unconditionally( + (ir_instruction *) ir_if->then_instructions.get_tail()); + lower_break_unconditionally( + (ir_instruction *) ir_if->else_instructions.get_tail()); + } + } + virtual void visit(class ir_loop_jump * ir) { /* Eliminate all instructions after each one, since they are @@ -616,7 +660,7 @@ retry: /* we get here if we put code after the if inside a branch */ * The visit() function for the loop will ensure that the * break flag is checked after executing the loop body. */ - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0)); + jumps[lower]->insert_before(create_lowered_break()); goto lower_continue; } else if(jump_strengths[lower] == strength_continue) { lower_continue: @@ -836,6 +880,9 @@ lower_continue: } if(this->loop.break_flag) { + /* We only get here if we are lowering breaks */ + assert (lower_break); + /* If a break flag was generated while visiting the body of * the loop, then at least one break was lowered, so we need * to generate an if statement at the end of the loop that @@ -843,7 +890,13 @@ lower_continue: * generate won't violate the CONTAINED_JUMPS_LOWERED * postcondition, because should_lower_jump() always returns * false for a break that happens at the end of a loop. + * + * However, if the loop already ends in a conditional or + * unconditional break, then we need to lower that break, + * because it won't be at the end of the loop anymore. */ + lower_final_breaks(&ir->body_instructions); + ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); ir->body_instructions.push_tail(break_if); From 1e39fc784bc3d0d5ad01d9c147529ac0e10f1262 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 5 May 2011 13:09:16 -0700 Subject: [PATCH 073/113] DRI2/GLX: use new swap event types Use the new swap event type so we get valid SBC values. Reviewed-by: Ian Romanick Reviewed-by: Jeremy Huddleston Signed-off-by: Jesse Barnes --- configure.ac | 4 ++-- src/glx/dri2.c | 4 ++-- src/glx/glxext.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index f19f6478b6a..dde13c9b57d 100644 --- a/configure.ac +++ b/configure.ac @@ -22,8 +22,8 @@ LIBDRM_REQUIRED=2.4.24 LIBDRM_RADEON_REQUIRED=2.4.24 LIBDRM_INTEL_REQUIRED=2.4.24 LIBDRM_NOUVEAU_REQUIRED=0.6 -DRI2PROTO_REQUIRED=2.1 -GLPROTO_REQUIRED=1.4.11 +DRI2PROTO_REQUIRED=2.6 +GLPROTO_REQUIRED=1.4.14 LIBDRM_XORG_REQUIRED=2.4.24 LIBKMS_XORG_REQUIRED=1.0.0 diff --git a/src/glx/dri2.c b/src/glx/dri2.c index adfd3d1f7c8..8654a37688f 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -97,7 +97,7 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) case DRI2_BufferSwapComplete: { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; - xDRI2BufferSwapComplete *awire = (xDRI2BufferSwapComplete *)wire; + xDRI2BufferSwapComplete2 *awire = (xDRI2BufferSwapComplete2 *)wire; /* Ignore swap events if we're not looking for them */ aevent->type = dri2GetSwapEventType(dpy, awire->drawable); @@ -124,7 +124,7 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) } aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo; + aevent->sbc = awire->sbc; return True; } #endif diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 73c332793a0..40a06a8612b 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -133,12 +133,12 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire) case GLX_BufferSwapComplete: { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; - xGLXBufferSwapComplete *awire = (xGLXBufferSwapComplete *)wire; + xGLXBufferSwapComplete2 *awire = (xGLXBufferSwapComplete2 *)wire; aevent->event_type = awire->event_type; aevent->drawable = awire->drawable; aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo; + aevent->sbc = awire->sbc; return True; } default: From 4df137691ee29bb812347fa2c5f19095243ede22 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 6 May 2011 10:31:24 -0700 Subject: [PATCH 074/113] GLX/DRI2: handle swap event swap count wrapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new GLX drawable struct to track client related info, and add a wrap counter to it drawable and track it as we receive events. This allows us to support the full 64 bits of the event structure we pass to the client even though the server only gives us a 32 bit count. Reviewed-by: Michel Dänzer Reviewed-by: Jeremy Huddleston Signed-off-by: Jesse Barnes --- src/glx/dri2.c | 12 ++++++++- src/glx/glx_pbuffer.c | 11 +++++++++ src/glx/glxclient.h | 16 ++++++++++++ src/glx/glxcmds.c | 57 +++++++++++++++++++++++++++++++++++++++++++ src/glx/glxext.c | 14 ++++++++++- 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/src/glx/dri2.c b/src/glx/dri2.c index 8654a37688f..229840d6919 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -88,6 +88,7 @@ static Bool DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); + struct glx_drawable *glxDraw; XextCheckExtension(dpy, info, dri2ExtensionName, False); @@ -98,6 +99,9 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; xDRI2BufferSwapComplete2 *awire = (xDRI2BufferSwapComplete2 *)wire; + __GLXDRIdrawable *pdraw; + + pdraw = dri2GetGlxDrawableFromXDrawableId(dpy, awire->drawable); /* Ignore swap events if we're not looking for them */ aevent->type = dri2GetSwapEventType(dpy, awire->drawable); @@ -124,7 +128,13 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) } aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = awire->sbc; + + glxDraw = GetGLXDrawable(dpy, pdraw->drawable); + if (awire->sbc < glxDraw->lastEventSbc) + glxDraw->eventSbcWrap += 0x100000000; + glxDraw->lastEventSbc = awire->sbc; + aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; + return True; } #endif diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c index 0e74e7ccd0e..6738252a31d 100644 --- a/src/glx/glx_pbuffer.c +++ b/src/glx/glx_pbuffer.c @@ -396,6 +396,7 @@ CreateDrawable(Display *dpy, struct glx_config *config, Drawable drawable, const int *attrib_list, CARD8 glxCode) { xGLXCreateWindowReq *req; + struct glx_drawable *glxDraw; CARD32 *data; unsigned int i; CARD8 opcode; @@ -411,6 +412,10 @@ CreateDrawable(Display *dpy, struct glx_config *config, if (!opcode) return None; + glxDraw = Xmalloc(sizeof(*glxDraw)); + if (!glxDraw) + return None; + LockDisplay(dpy); GetReqExtra(GLXCreateWindow, 8 * i, req); data = (CARD32 *) (req + 1); @@ -429,6 +434,11 @@ CreateDrawable(Display *dpy, struct glx_config *config, UnlockDisplay(dpy); SyncHandle(); + if (InitGLXDrawable(dpy, glxDraw, drawable, xid)) { + free(glxDraw); + return None; + } + if (!CreateDRIDrawable(dpy, config, drawable, xid, attrib_list, i)) { if (glxCode == X_GLXCreatePixmap) glxCode = X_GLXDestroyPixmap; @@ -454,6 +464,7 @@ DestroyDrawable(Display * dpy, GLXDrawable drawable, CARD32 glxCode) protocolDestroyDrawable(dpy, drawable, glxCode); + DestroyGLXDrawable(dpy, drawable); DestroyDRIDrawable(dpy, drawable, GL_FALSE); return; diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h index 06415288165..f9154266101 100644 --- a/src/glx/glxclient.h +++ b/src/glx/glxclient.h @@ -567,6 +567,8 @@ struct glx_display */ struct glx_screen **screens; + __glxHashTable *glXDrawHash; + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __glxHashTable *drawHash; @@ -579,6 +581,14 @@ struct glx_display #endif }; +struct glx_drawable { + XID xDrawable; + XID drawable; + + uint32_t lastEventSbc; + int64_t eventSbcWrap; +}; + extern int glx_screen_init(struct glx_screen *psc, int screen, struct glx_display * priv); @@ -784,6 +794,12 @@ extern int applegl_create_display(struct glx_display *display); #endif + +extern struct glx_drawable *GetGLXDrawable(Display *dpy, GLXDrawable drawable); +extern int InitGLXDrawable(Display *dpy, struct glx_drawable *glxDraw, + XID xDrawable, GLXDrawable drawable); +extern void DestroyGLXDrawable(Display *dpy, GLXDrawable drawable); + extern struct glx_context dummyContext; extern struct glx_screen * diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 191b321ce32..fc0a07901a7 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -90,6 +90,51 @@ GetGLXDRIDrawable(Display * dpy, GLXDrawable drawable) #endif +_X_HIDDEN struct glx_drawable * +GetGLXDrawable(Display *dpy, GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + struct glx_drawable *glxDraw; + + if (priv == NULL) + return NULL; + + if (__glxHashLookup(priv->glXDrawHash, drawable, (void *) &glxDraw) == 0) + return glxDraw; + + return NULL; +} + +_X_HIDDEN int +InitGLXDrawable(Display *dpy, struct glx_drawable *glxDraw, XID xDrawable, + GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + + if (!priv) + return -1; + + glxDraw->xDrawable = xDrawable; + glxDraw->drawable = drawable; + glxDraw->lastEventSbc = 0; + glxDraw->eventSbcWrap = 0; + + return __glxHashInsert(priv->glXDrawHash, drawable, glxDraw); +} + +_X_HIDDEN void +DestroyGLXDrawable(Display *dpy, GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + struct glx_drawable *glxDraw; + + if (!priv) + return; + + glxDraw = GetGLXDrawable(dpy, drawable); + __glxHashDelete(priv->glXDrawHash, drawable); + free(glxDraw); +} /** * Get the GLX per-screen data structure associated with a GLX context. @@ -608,6 +653,7 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) return pixmap; #else xGLXCreateGLXPixmapReq *req; + struct glx_drawable *glxDraw; GLXPixmap xid; CARD8 opcode; @@ -616,6 +662,10 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) return None; } + glxDraw = Xmalloc(sizeof(*glxDraw)); + if (!glxDraw) + return None; + /* Send the glXCreateGLXPixmap request */ LockDisplay(dpy); GetReq(GLXCreateGLXPixmap, req); @@ -628,6 +678,11 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) UnlockDisplay(dpy); SyncHandle(); + if (InitGLXDrawable(dpy, glxDraw, pixmap, req->glxpixmap)) { + free(glxDraw); + return None; + } + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) do { /* FIXME: Maybe delay __DRIdrawable creation until the drawable @@ -700,6 +755,8 @@ glXDestroyGLXPixmap(Display * dpy, GLXPixmap glxpixmap) UnlockDisplay(dpy); SyncHandle(); + DestroyGLXDrawable(dpy, glxpixmap); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) { struct glx_display *const priv = __glXInitialize(dpy); diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 40a06a8612b..8704c484f96 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -134,11 +134,19 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire) { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; xGLXBufferSwapComplete2 *awire = (xGLXBufferSwapComplete2 *)wire; + struct glx_drawable *glxDraw = GetGLXDrawable(dpy, awire->drawable); aevent->event_type = awire->event_type; aevent->drawable = awire->drawable; aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = awire->sbc; + + if (!glxDraw) + return False; + + if (awire->sbc < glxDraw->lastEventSbc) + glxDraw->eventSbcWrap += 0x100000000; + glxDraw->lastEventSbc = awire->sbc; + aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; return True; } default: @@ -227,6 +235,8 @@ glx_display_free(struct glx_display *priv) if (priv->serverGLXversion) Xfree((char *) priv->serverGLXversion); + __glxHashDestroy(priv->glXDrawHash); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __glxHashDestroy(priv->drawHash); @@ -847,6 +857,8 @@ __glXInitialize(Display * dpy) XESetCloseDisplay(dpy, dpyPriv->codes->extension, __glXCloseDisplay); XESetErrorString (dpy, dpyPriv->codes->extension,__glXErrorString); + dpyPriv->glXDrawHash = __glxHashCreate(); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) glx_direct = (getenv("LIBGL_ALWAYS_INDIRECT") == NULL); glx_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL); From 86f8b4117f35c788c8a043c2e241eb19eaacae8c Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 6 Jul 2011 05:29:08 +0400 Subject: [PATCH 075/113] r600g: LIT: swap MUL_LIT operands to fix 0^0 For 0^0 case result of "LOG_CLAMPED ...,0" is -MAX_FLOAT, and then result of "MUL_LIT ...,0,-MAX_FLOAT,..." is -MAX_FLOAT instead of 0 because of special src1 checks for -MAX_FLOAT. So swap src0/1: "MUL_LIT ...,-MAX_FLOAT,0,..." to get expected 0, then result of "EXP_IEEE ...,0" is 1 as expected for LIT. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f83d7079b29..f3cbf9807fd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1388,7 +1388,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } else { - /* dst.z = log(src.y) */ + /* tmp.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); r600_bc_src(&alu.src[0], &ctx->src[0], 1); @@ -1404,13 +1404,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) chan = alu.dst.chan; sel = alu.dst.sel; - /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[1].sel = sel; - alu.src[1].chan = chan; - + alu.src[0].sel = sel; + alu.src[0].chan = chan; + r600_bc_src(&alu.src[1], &ctx->src[0], 3); r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; From b693787fdf82d065c548e80944aad14e9ba64def Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 6 Jul 2011 05:29:09 +0400 Subject: [PATCH 076/113] r600g: RSQ: clear NEG for operand Need to clear NEG bit because it applies after ABS, e.g. "RSQ ..., -1" uses -|1| as operand. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f3cbf9807fd..5aad3e359bc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1506,6 +1506,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; + alu.src[i].neg = 0; } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; From f87d566f4b0e4df18ab60d64951013629bdd624c Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 8 Jul 2011 06:19:36 +0400 Subject: [PATCH 077/113] r600g: introduce r600_bc_src_set_abs helper and fix LOG LOG instruction should use absolute values of source operand. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5aad3e359bc..6dae6926360 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -939,6 +939,12 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } +static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +{ + bc_src->abs = 1; + bc_src->neg = 0; +} + static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -998,9 +1004,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) alu.src[1].neg = 1; break; case TGSI_OPCODE_ABS: - alu.src[0].abs = 1; - if (alu.src[0].neg) - alu.src[0].neg = 0; + r600_bc_src_set_abs(&alu.src[0]); break; default: break; @@ -1505,8 +1509,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bc_src(&alu.src[i], &ctx->src[i], 0); - alu.src[i].abs = 1; - alu.src[i].neg = 0; + r600_bc_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2489,7 +2492,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) int r; int i; - /* result.x = floor(log2(src)); */ + /* result.x = floor(log2(|src|)); */ if (inst->Dst[0].Register.WriteMask & 1) { if (ctx->bc->chiprev == CHIPREV_CAYMAN) { for (i = 0; i < 3; i++) { @@ -2497,6 +2500,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2514,6 +2518,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2538,7 +2543,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.y = src.x / (2 ^ floor(log2(src.x))); */ + /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { if (ctx->bc->chiprev == CHIPREV_CAYMAN) { @@ -2547,6 +2552,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2564,6 +2570,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2663,6 +2670,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2677,7 +2685,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.z = log2(src);*/ + /* result.z = log2(|src|);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { if (ctx->bc->chiprev == CHIPREV_CAYMAN) { for (i = 0; i < 3; i++) { @@ -2685,6 +2693,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2702,6 +2711,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; From 3efb47f0b0557a6b96a7e41b27725cea4736a061 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 8 Jul 2011 06:19:37 +0400 Subject: [PATCH 078/113] r600g: introduce r600_bc_src_toggle_neg helper and fix SUB & LRP SUB & LRP instructions should toggle NEG bit instead of setting it, otherwise e.g. "SUB a,b,-1" is translated as "ADD a,b,-1" Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6dae6926360..6bb5ceb5450 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -945,6 +945,11 @@ static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) bc_src->neg = 0; } +static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +{ + bc_src->neg = !bc_src->neg; +} + static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -1001,7 +1006,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: r600_bc_src_set_abs(&alu.src[0]); @@ -2195,7 +2200,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; r600_bc_src(&alu.src[1], &ctx->src[0], i); - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { From 57b57f6d1c3689a3a44222cb169bfd3e3142a68d Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 8 Jul 2011 20:17:50 -0700 Subject: [PATCH 079/113] i965/gen7: Remove gratuitous dirty flags from WM and PS state. Commit b46dc45ceef3deb17ba2b0b4300eeb93e9cf7833 claimed that NEW_POLYGONSTIPPLE is gratuitous, but somehow just changed comments and whitespace instead of actually removing the flag. While we're at it, 3DSTATE_PS doesn't appear to need NEW_LINE or NEW_POLYGON either (those are in 3DSTATE_WM). Also, 3DSTATE_WM doesn't appear to need BRW_NEW_NR_WM_SURFACES or BRW_NEW_CURBE_OFFSETS either (those are in 3DSTATE_PS). NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen7_wm_state.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 17f75354f1d..0f5b06cbf48 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -138,11 +138,9 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen7_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | + .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), .cache = 0, @@ -240,10 +238,7 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_LINE | - _NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_PROGRAM_CONSTANTS), + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | From d03fdc4cdefdfdc5b59547945704c6037a5061c7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Jul 2011 15:30:48 -0700 Subject: [PATCH 080/113] i965/gen4: Fix GPU hangs since the program streaming change. This was tricky. We were doing a use-before-initialize of grf_reg_count, but the value usually got overwritten anyway -- when we didn't have to do a relocation (typical), or on gen5 when we didn't have relocations at all. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38771 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vs_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index d5010a21e80..179ca199b45 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -47,6 +47,7 @@ brw_prepare_vs_unit(struct brw_context *brw) memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread0.kernel_start_pointer = brw_program_reloc(brw, brw->vs.state_offset + @@ -54,7 +55,6 @@ brw_prepare_vs_unit(struct brw_context *brw) brw->vs.prog_offset + (vs->thread0.grf_reg_count << 1)) >> 6; - vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, From 949896b82f19f72333e7f6c132bd55e023f0170f Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 081/113] r600g: Fix the type of the family field in r600_pipe_context. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_pipe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index c58c2f77743..b51fa24dfb2 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -175,7 +175,7 @@ struct r600_pipe_fences { struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - unsigned family; + enum radeon_family family; void *custom_dsa_flush; struct r600_screen *screen; struct radeon *radeon; From b3b946b0ab88c1d7edeab183d8ad5125ba223392 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 082/113] r600g: Store the chip class in r600_pipe_context. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/evergreen_state.c | 20 +++------- src/gallium/drivers/r600/r600_asm.c | 4 +- src/gallium/drivers/r600/r600_pipe.c | 42 +++++--------------- src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_state.c | 13 +++--- src/gallium/drivers/r600/r600_state_common.c | 26 ++++++------ 6 files changed, 36 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 887f52e67db..97f10ce77b9 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -657,13 +657,11 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, u32 color_control, target_mask; /* FIXME there is more then 8 framebuffer */ unsigned blend_cntl[8]; - enum radeon_family family; if (blend == NULL) { return NULL; } - family = r600_get_family(rctx->radeon); rstate = &blend->rstate; rstate->id = R600_PIPE_STATE_BLEND; @@ -690,7 +688,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFD, NULL); - if (family != CHIP_CAYMAN) + if (rctx->chip_class != CAYMAN) r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); else { r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); @@ -827,9 +825,6 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; unsigned clip_rule; - enum radeon_family family; - - family = r600_get_family(rctx->radeon); if (rs == NULL) { return NULL; @@ -888,7 +883,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), @@ -1447,14 +1442,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); u32 shader_mask, tl, br, target_mask; - enum radeon_family family; int tl_x, tl_y, br_x, br_y; if (rstate == NULL) return; - family = r600_get_family(rctx->radeon); - evergreen_context_flush_dest_caches(&rctx->ctx); rctx->ctx.num_dest_buffers = state->nr_cbufs; @@ -1491,7 +1483,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (br_y == 0) tl_y = 1; /* cayman hw workaround */ - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { if (br_x == 1 && br_y == 1) br_x = 2; } @@ -1535,7 +1527,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, shader_mask, 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, 0x00000000, 0xFFFFFFFF, NULL); } else { @@ -1722,9 +1714,9 @@ void evergreen_init_config(struct r600_pipe_context *rctx) enum radeon_family family; unsigned tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { cayman_init_config(rctx); return; } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 065f955ebcb..cd4984e389a 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2162,7 +2162,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; unsigned format, num_format, format_comp, endian; u32 *bytecode; int i, r; @@ -2287,7 +2287,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bo_unmap(rctx->radeon, ve->fetch_shader); r600_bc_clear(&bc); - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); else r600_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d512268f63f..a3df4f571a0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -194,7 +194,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void { struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); struct r600_screen* rscreen = (struct r600_screen *)screen; - enum chip_class class; if (rctx == NULL) return NULL; @@ -211,6 +210,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->screen = rscreen; rctx->radeon = rscreen->radeon; rctx->family = r600_get_family(rctx->radeon); + rctx->chip_class = r600_get_family_class(rctx->radeon); rctx->fences.bo = NULL; rctx->fences.data = NULL; @@ -224,47 +224,29 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_surface_functions(rctx); rctx->context.draw_vbo = r600_draw_vbo; - switch (r600_get_family(rctx->radeon)) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: + switch (rctx->chip_class) { + case R600: + case R700: r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } r600_init_config(rctx); + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - case CHIP_CAYMAN: + case EVERGREEN: + case CAYMAN: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } evergreen_init_config(rctx); + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); break; default: - R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + R600_ERR("Unsupported chip class %d.\n", rctx->chip_class); r600_destroy_context(&rctx->context); return NULL; } @@ -289,12 +271,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - class = r600_get_family_class(rctx->radeon); - if (class == R600 || class == R700) - rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); - else - rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index b51fa24dfb2..6f399ed43b0 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -176,6 +176,7 @@ struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; enum radeon_family family; + enum chip_class chip_class; void *custom_dsa_flush; struct r600_screen *screen; struct radeon *radeon; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3085cd9a87a..8a684e63c01 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1399,7 +1399,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* EXPORT_NORM is an optimzation that can be enabled for better * performance in certain cases */ - if (rctx->family < CHIP_RV770) { + if (rctx->chip_class == R600) { /* EXPORT_NORM can be enabled if: * - 11-bit or smaller UNORM/SNORM/SRGB * - BLEND_CLAMP is enabled @@ -1559,7 +1559,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, 0xFFFFFFFF, NULL); @@ -1653,16 +1653,13 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) void r600_adjust_gprs(struct r600_pipe_context *rctx) { - enum radeon_family family; struct r600_pipe_state rstate; unsigned num_ps_gprs = rctx->default_ps_gprs; unsigned num_vs_gprs = rctx->default_vs_gprs; unsigned tmp; int diff; - family = r600_get_family(rctx->radeon); - - if (family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) return; if (!rctx->ps_shader && !rctx->vs_shader) @@ -1714,7 +1711,7 @@ void r600_init_config(struct r600_pipe_context *rctx) struct r600_pipe_state *rstate = &rctx->config; u32 tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; ps_prio = 0; vs_prio = 1; gs_prio = 2; @@ -1895,7 +1892,7 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - if (family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d9140403e5a..408eaed491b 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -109,7 +109,7 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->states[rs->rstate.id] = &rs->rstate; r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_polygon_offset_update(rctx); } else { r600_polygon_offset_update(rctx); @@ -212,7 +212,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, /* Zero states. */ for (i = 0; i < count; i++) { if (!buffers[i].buffer) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -220,7 +220,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } } for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -367,7 +367,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) for (i = 0; i < rshader->ninput; i++) { if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || rshader->input[i].name == TGSI_SEMANTIC_FACE) - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) continue; else sid=0; @@ -387,7 +387,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { + if (rctx->chip_class < EVERGREEN) { if (rshader->input[i].centroid) tmp |= S_028644_SEL_CENTROID(1); @@ -434,14 +434,14 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { @@ -462,13 +462,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { @@ -521,14 +521,14 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { @@ -600,7 +600,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_shader_rebuild(ctx, rctx->vs_shader); if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) || - ((rctx->family >= CHIP_CEDAR) && rctx->ps_shader->shader.fs_write_all && + ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all && (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) r600_shader_rebuild(ctx, rctx->ps_shader); @@ -655,7 +655,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.indices_bo_offset = draw.index_buffer_offset; } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_draw(&rctx->ctx, &rdraw); } else { r600_context_draw(&rctx->ctx, &rdraw); From 4f7dfd8ad3185f006e7ae8ed86bafd4d66ebc903 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 083/113] r600g: Get rid of the superfluous family field from r600_shader. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_shader.c | 9 ++++----- src/gallium/drivers/r600/r600_shader.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6bb5ceb5450..91649e0c058 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -99,14 +99,14 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s /* build state */ switch (rshader->processor_type) { case TGSI_PROCESSOR_VERTEX: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_vs(ctx, shader); } else { r600_pipe_shader_vs(ctx, shader); } break; case TGSI_PROCESSOR_FRAGMENT: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_ps(ctx, shader); } else { r600_pipe_shader_ps(ctx, shader); @@ -135,7 +135,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s fprintf(stderr, "--------------------------------------------------------------\n"); tgsi_dump(shader->tokens, 0); } - shader->shader.family = r600_get_family(rctx->radeon); r = r600_shader_from_tgsi(rctx, shader); if (r) { R600_ERR("translation from TGSI failed !\n"); @@ -610,7 +609,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.bc = &shader->bc; ctx.shader = shader; - r = r600_bc_init(ctx.bc, shader->family); + r = r600_bc_init(ctx.bc, rctx->family); if (r) return r; ctx.tokens = tokens; @@ -802,7 +801,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { output[i + j].array_base = shader->output[i].sid; output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { + if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { for (j = 1; j < shader->nr_cbufs; j++) { memset(&output[i + j], 0, sizeof(struct r600_bc_output)); output[i + j].gpr = shader->output[i].gpr; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 76aebf2b1ea..3ba84bd8907 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -43,7 +43,6 @@ struct r600_shader { unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; - enum radeon_family family; boolean uses_kill; boolean fs_write_all; boolean clamp_color; From 89dc31a28d8e5607989ec11cfd29310c1c97f6ac Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 084/113] r600g: Replace the CHIPREV_* defines with the chip_class enum. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/eg_asm.c | 2 +- src/gallium/drivers/r600/r600_asm.c | 136 ++++++++++++------------ src/gallium/drivers/r600/r600_asm.h | 2 +- src/gallium/drivers/r600/r600_opcodes.h | 10 +- src/gallium/drivers/r600/r600_shader.c | 42 ++++---- 5 files changed, 93 insertions(+), 99 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fb0b0f104bf..c95872b0809 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -69,7 +69,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst); - if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */ + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index cd4984e389a..471fc65e7a5 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -41,9 +41,9 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r if(alu->is_op3) return 3; - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: switch (alu->inst) { case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -93,8 +93,8 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r "Need instruction operand number for 0x%x.\n", alu->inst); } break; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: switch (alu->inst) { case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -208,13 +208,13 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_RV635: case CHIP_RS780: case CHIP_RS880: - bc->chiprev = CHIPREV_R600; + bc->chip_class = R600; break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - bc->chiprev = CHIPREV_R700; + bc->chip_class = R700; break; case CHIP_CEDAR: case CHIP_REDWOOD: @@ -227,10 +227,10 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: - bc->chiprev = CHIPREV_EVERGREEN; + bc->chip_class = EVERGREEN; break; case CHIP_CAYMAN: - bc->chiprev = CHIPREV_CAYMAN; + bc->chip_class = CAYMAN; break; default: R600_ERR("unknown family %d\n", bc->family); @@ -301,9 +301,9 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) /* alu instructions that can ony exits once per group */ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || @@ -339,8 +339,8 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || @@ -382,16 +382,16 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || @@ -403,13 +403,13 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; @@ -418,15 +418,15 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); @@ -438,16 +438,16 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || - (bc->chiprev == CHIPREV_EVERGREEN && + (bc->chip_class == EVERGREEN && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); } /* alu instructions that can only execute on the trans unit */ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: if (!alu->is_op3) return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || @@ -478,8 +478,8 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: if (!alu->is_op3) /* Note that FLT_TO_INT_* instructions are vector-only instructions @@ -525,7 +525,7 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, { struct r600_bc_alu *alu; unsigned i, chan, trans; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; @@ -612,7 +612,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; - if (bc->chiprev >= CHIPREV_R700) { + if (bc->chip_class >= R700) { num_res = 2; chan /= 2; } @@ -733,8 +733,8 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + boolean scalar_only = bc->chip_class == CAYMAN ? false : true; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { @@ -806,7 +806,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, struct r600_bc_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -834,7 +834,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; - if (bc->chiprev < CHIPREV_CAYMAN) { + if (bc->chip_class < CAYMAN) { if (alu->src[src].sel == gpr[4] && alu->src[src].chan == chan[4]) { alu->src[src].sel = V_SQ_ALU_SRC_PS; @@ -948,7 +948,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], int i, j, r, src, num_src; int num_once_inst = 0; int have_mova = 0, have_rel = 0; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -1252,7 +1252,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int uint32_t literal[4]; unsigned nliteral; struct r600_bc_alu *slots[5]; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) return r; @@ -1302,26 +1302,26 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) { - switch (bc->chiprev) { - case CHIPREV_R600: + switch (bc->chip_class) { + case R600: return 8; - case CHIPREV_R700: + case R700: return 16; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: return 64; default: - R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + R600_ERR("Unknown chip class %d.\n", bc->chip_class); return 8; } } static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) { - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) return TRUE; } else { @@ -1350,7 +1350,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) free(nvtx); return r; } - if (bc->chiprev == CHIPREV_CAYMAN) + if (bc->chip_class == CAYMAN) bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; else bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; @@ -1438,7 +1438,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); id++; bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | @@ -1453,7 +1453,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); id++; bc->bytecode[id++] = 0; @@ -1560,13 +1560,13 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - if (bc->chiprev == CHIPREV_R700) + if (bc->chip_class == R700) r700_bc_cf_vtx_build(&bc->bytecode[id], cf); else r600_bc_cf_vtx_build(&bc->bytecode[id], cf); @@ -1673,7 +1673,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev >= CHIPREV_EVERGREEN) + if (bc->chip_class >= EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -1691,13 +1691,13 @@ int r600_bc_build(struct r600_bc *bc) if (r) return r; r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); - switch(bc->chiprev) { - case CHIPREV_R600: + switch(bc->chip_class) { + case R600: r = r600_bc_alu_build(bc, alu, addr); break; - case CHIPREV_R700: - case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ - case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */ + case R700: + case EVERGREEN: /* eg alu is same encoding as r700 */ + case CAYMAN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -1726,7 +1726,7 @@ int r600_bc_build(struct r600_bc *bc) } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { r = r600_bc_vtx_build(bc, vtx, addr); if (r) @@ -1812,17 +1812,17 @@ void r600_bc_dump(struct r600_bc *bc) unsigned nliteral; char chip = '6'; - switch (bc->chiprev) { - case 1: + switch (bc->chip_class) { + case R700: chip = '7'; break; - case 2: + case EVERGREEN: chip = 'E'; break; - case 3: + case CAYMAN: chip = 'C'; break; - case 0: + case R600: default: chip = '6'; break; @@ -1993,7 +1993,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); else fprintf(stderr, "SEL_Y:%d) ", 0); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 540f45bbd06..423e94b8a1d 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -172,7 +172,7 @@ struct r600_cf_callstack { struct r600_bc { enum radeon_family family; - int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ + enum chip_class chip_class; int type; struct list_head cf; struct r600_bc_cf *cf_last; diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 184f32c9960..7ae091ea5cd 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -409,14 +409,8 @@ #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS 0x0000005C +#define BC_INST(bc, x) ((bc)->chip_class >= EVERGREEN ? EG_##x : x) -#define CHIPREV_R600 0 -#define CHIPREV_R700 1 -#define CHIPREV_EVERGREEN 2 -#define CHIPREV_CAYMAN 3 - -#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) - -#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) +#define CTX_INST(x) (ctx->bc->chip_class >= EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 91649e0c058..6f84d8740ee 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -316,7 +316,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -650,13 +650,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.bc->chip_class >= EVERGREEN) { r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -710,9 +710,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) + else if (ctx.bc->chip_class >= EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -885,7 +885,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (ctx.bc->chiprev < CHIPREV_CAYMAN) { + if (ctx.bc->chip_class < CAYMAN) { if (i == (noutput - 1)) { output[i].end_of_program = 1; } @@ -902,7 +902,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi goto out_err; } /* add program end */ - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) cm_bc_add_cf_end(ctx.bc); free(ctx.literals); @@ -1122,7 +1122,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - if (ctx->bc->chiprev == CHIPREV_R600) { + if (ctx->bc->chip_class == R600) { alu.src[1].value = *(uint32_t *)&double_pi; alu.src[2].value = *(uint32_t *)&neg_pi; } else { @@ -1229,7 +1229,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.x = COS */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); @@ -1263,7 +1263,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.y = SIN */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); @@ -1378,7 +1378,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) int sel; int i; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1428,7 +1428,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1905,7 +1905,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { int out_chan; /* Add perspective divide */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1987,7 +1987,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } /* tmp1.z = RCP_e(|tmp1.z|) */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2380,7 +2380,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -2436,7 +2436,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2498,7 +2498,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.x = floor(log2(|src|)); */ if (inst->Dst[0].Register.WriteMask & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2550,7 +2550,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2601,7 +2601,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2635,7 +2635,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2691,7 +2691,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.z = log2(|src|);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); From 2b5b289a570c699403f115cf3ad094ce92eba2fb Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 085/113] r600g: Store the chip class directly in r600_bc. Instead of deriving it from the family again. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_asm.c | 48 +++----------------------- src/gallium/drivers/r600/r600_asm.h | 3 +- src/gallium/drivers/r600/r600_shader.c | 4 +-- 3 files changed, 6 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 471fc65e7a5..5fae2b00c8b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -195,48 +195,10 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); - bc->family = family; - switch (bc->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - bc->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - bc->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - bc->chip_class = EVERGREEN; - break; - case CHIP_CAYMAN: - bc->chip_class = CAYMAN; - break; - default: - R600_ERR("unknown family %d\n", bc->family); - return -EINVAL; - } - return 0; + bc->chip_class = chip_class; } static int r600_bc_add_cf(struct r600_bc *bc) @@ -1701,7 +1663,7 @@ int r600_bc_build(struct r600_bc *bc) r = r700_bc_alu_build(bc, alu, addr); break; default: - R600_ERR("unknown family %d\n", bc->family); + R600_ERR("unknown chip class %d.\n", bc->chip_class); return -EINVAL; } if (r) @@ -2180,9 +2142,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); - if (r) - return r; + r600_bc_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 423e94b8a1d..cbdaacf7178 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -171,7 +171,6 @@ struct r600_cf_callstack { }; struct r600_bc { - enum radeon_family family; enum chip_class chip_class; int type; struct list_head cf; @@ -193,7 +192,7 @@ struct r600_bc { int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); /* r600_asm.c */ -int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6f84d8740ee..de49d212a58 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -609,9 +609,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.bc = &shader->bc; ctx.shader = shader; - r = r600_bc_init(ctx.bc, rctx->family); - if (r) - return r; + r600_bc_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); From 2e53725bbc6385c76b26f069df0865c22fd365c0 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:19:00 +0200 Subject: [PATCH 086/113] r600g: Check for Evergreen chip class instead of Cedar family in r600_context_flush(). Signed-off-by: Henri Verbeet --- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 633cd35f7a7..a2f13ff0863 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1504,7 +1504,7 @@ void r600_context_flush(struct r600_context *ctx) /* suspend queries */ r600_context_queries_suspend(ctx); - if (ctx->radeon->family >= CHIP_CEDAR) + if (ctx->radeon->chip_class >= EVERGREEN) evergreen_context_flush_dest_caches(ctx); else r600_context_flush_dest_caches(ctx); From 7e591111bf783d94ee6034287cde2f4c9214e810 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:19:00 +0200 Subject: [PATCH 087/113] r600g: Get rid of some superfluous braces. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/evergreen_state.c | 14 +++++++------- src/gallium/drivers/r600/r600_state.c | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 97f10ce77b9..fbf25feaf20 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -510,11 +510,11 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) if (R600_BIG_ENDIAN) { switch(colorformat) { case V_028C70_COLOR_4_4: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 8-bit buffers. */ case V_028C70_COLOR_8: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 16-bit buffers. */ case V_028C70_COLOR_5_6_5: @@ -522,7 +522,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_4_4_4_4: case V_028C70_COLOR_16: case V_028C70_COLOR_8_8: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; /* 32-bit buffers. */ case V_028C70_COLOR_8_8_8_8: @@ -532,23 +532,23 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_FLOAT: case V_028C70_COLOR_16_16_FLOAT: case V_028C70_COLOR_16_16: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 64-bit buffers. */ case V_028C70_COLOR_16_16_16_16: case V_028C70_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 96-bit buffers. */ case V_028C70_COLOR_32_32_32_FLOAT: /* 128-bit buffers. */ case V_028C70_COLOR_32_32_32_32_FLOAT: case V_028C70_COLOR_32_32_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; default: return ENDIAN_NONE; /* Unsupported. */ } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8a684e63c01..203b39f855f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -501,11 +501,11 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) if (R600_BIG_ENDIAN) { switch(colorformat) { case V_0280A0_COLOR_4_4: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 8-bit buffers. */ case V_0280A0_COLOR_8: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 16-bit buffers. */ case V_0280A0_COLOR_5_6_5: @@ -513,7 +513,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_4_4_4_4: case V_0280A0_COLOR_16: case V_0280A0_COLOR_8_8: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; /* 32-bit buffers. */ case V_0280A0_COLOR_8_8_8_8: @@ -523,22 +523,22 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_FLOAT: case V_0280A0_COLOR_16_16_FLOAT: case V_0280A0_COLOR_16_16: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 64-bit buffers. */ case V_0280A0_COLOR_16_16_16_16: case V_0280A0_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; case V_0280A0_COLOR_32_32_FLOAT: case V_0280A0_COLOR_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 128-bit buffers. */ case V_0280A0_COLOR_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; default: return ENDIAN_NONE; /* Unsupported. */ } From 440224ab73ba19a96629c34e21fe976d1395e483 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 9 Jul 2011 02:46:03 -0700 Subject: [PATCH 088/113] intel: Recognize all depth formats in get_teximage_readbuffer. The existing code was missing GL_DEPTH_COMPONENT32, resulting in it wrongly returning the color buffer instead of the depth buffer. Fixes an issue in PlaneShift 0.5.7 when casting spells. The game calls CopyTexSubImage2D on buffers with a GL_DEPTH_COMPONENT32 internal format, which (prior to this patch) resulted in an attempt to copy ARGB8888 to X8_Z24. Instead of adding the missing enumeration directly, convert the code to use _mesa_is_depth_format() and _mesa_is_depthstencil_format() as these should catch any newly added depth formats in the future. NOTE: This is a candidate for the 7.10 and 7.11 branches. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index eda07a43dee..1a3643da593 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -55,15 +55,11 @@ get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); - switch (internalFormat) { - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH24_STENCIL8_EXT: - case GL_DEPTH_STENCIL_EXT: + if (_mesa_is_depth_format(internalFormat) || + _mesa_is_depthstencil_format(internalFormat)) return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - default: - return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - } + + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } From 95f9e118fe7b02ab5d28550dabd8751e5fb15e3c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 10 Jul 2011 01:27:20 +0800 Subject: [PATCH 089/113] st/egl: fix linking errors Add symbols referenced by src/glx/dri2.c. --- .../state_trackers/egl/x11/x11_screen.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index f1cc4400ba5..6155b4d03c0 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -452,6 +452,12 @@ dri2InvalidateBuffers(Display *dpy, XID drawable) extern unsigned dri2GetSwapEventType(Display *dpy, XID drawable); +extern void * +dri2GetGlxDrawableFromXDrawableId(Display *dpy, XID id); + +extern void * +GetGLXDrawable(Display *dpy, XID drawable); + /** * This is also called from src/glx/dri2.c. */ @@ -460,4 +466,16 @@ unsigned dri2GetSwapEventType(Display *dpy, XID drawable) return 0; } +void * +dri2GetGlxDrawableFromXDrawableId(Display *dpy, XID id) +{ + return NULL; +} + +void * +GetGLXDrawable(Display *dpy, XID drawable) +{ + return NULL; +} + #endif /* GLX_DIRECT_RENDERING */ From d644a50dc328e54d513e4304378bb8c34148f7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 Jul 2011 17:40:38 +0200 Subject: [PATCH 090/113] st/dri: remove unused variables --- src/gallium/state_trackers/dri/common/dri_context.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 08bbdf96e34..e6612b1911d 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -143,8 +143,6 @@ dri_unbind_context(__DRIcontext * cPriv) /* dri_util.c ensures cPriv is not null */ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); struct dri_context *ctx = dri_context(cPriv); - struct dri_drawable *draw = dri_drawable(ctx->dPriv); - struct dri_drawable *read = dri_drawable(ctx->rPriv); struct st_api *stapi = screen->st_api; if (--ctx->bind_count == 0) { From f0a7e28e29b5005c20ac02a7eec6511f6d7fd1c4 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Sun, 10 Jul 2011 13:19:38 -0400 Subject: [PATCH 091/113] r600g: LIT: clamp negative src.y to 0 Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39083 Signed-off-by: Vadim Girlin Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/r600_shader.c | 29 +++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index de49d212a58..3e21ad1fdc6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1370,6 +1370,22 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; + /* tmp.x = max(src.y, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1378,11 +1394,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - /* dst.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; if (i == 2) { alu.dst.write = 1; alu.last = 1; @@ -1394,10 +1412,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } else { - /* tmp.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 2; alu.dst.write = 1; From 1165280cbd37dee1e499358633478ab869de21df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 1 Jun 2011 15:48:51 +0200 Subject: [PATCH 092/113] mesa: initial ARB_depth_buffer_float support Using GL_NONE as DataType of Z32_FLOAT_X24S8, not sure what I should put there. The spec says the type is n/a. Reviewed-by: Kenneth Graunke --- src/mesa/main/fbobject.c | 19 +++++++++++++++++++ src/mesa/main/formats.c | 29 +++++++++++++++++++++++++++++ src/mesa/main/formats.h | 3 +++ src/mesa/main/image.c | 18 ++++++++++++++++-- src/mesa/main/readpix.c | 29 +++++++++++++++++++++++++---- src/mesa/main/renderbuffer.c | 3 +++ src/mesa/main/texfetch.c | 14 ++++++++++++++ src/mesa/main/texformat.c | 13 +++++++++++++ src/mesa/main/texstore.c | 3 +++ 9 files changed, 125 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8cc3fd49a34..d094dd35a69 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1131,6 +1131,16 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat) return GL_DEPTH_STENCIL_EXT; else return 0; + case GL_DEPTH_COMPONENT32F: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT; + else + return 0; + case GL_DEPTH32F_STENCIL8: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_STENCIL; + else + return 0; case GL_RED: case GL_R8: case GL_R16: @@ -2266,6 +2276,15 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, /* special cases */ *params = GL_INDEX; } + else if (format == MESA_FORMAT_Z32_FLOAT_X24S8) { + /* depends on the attachment parameter */ + if (attachment == GL_STENCIL_ATTACHMENT) { + *params = GL_INDEX; + } + else { + *params = GL_FLOAT; + } + } else { *params = _mesa_get_format_datatype(format); } diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index e88ba43971b..f58b1975672 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1091,6 +1091,25 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] = 0, 0, 0, 0, 0, 1, 1, 4 }, + /* ARB_depth_buffer_float */ + { + MESA_FORMAT_Z32_FLOAT, /* Name */ + "MESA_FORMAT_Z32_FLOAT", /* StrName */ + GL_DEPTH_COMPONENT, /* BaseFormat */ + GL_FLOAT, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 0, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 4 /* BlockWidth/Height,Bytes */ + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, /* Name */ + "MESA_FORMAT_Z32_FLOAT_X24S8", /* StrName */ + GL_DEPTH_STENCIL, /* BaseFormat */ + GL_NONE /* XXX */, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 8, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 8 /* BlockWidth/Height,Bytes */ + }, }; @@ -1654,6 +1673,16 @@ _mesa_format_to_type_and_comps(gl_format format, *comps = 1; return; + case MESA_FORMAT_Z32_FLOAT: + *datatype = GL_FLOAT; + *comps = 1; + return; + + case MESA_FORMAT_Z32_FLOAT_X24S8: + *datatype = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + *comps = 1; + return; + case MESA_FORMAT_DUDV8: *datatype = GL_BYTE; *comps = 2; diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 0640bbc4af1..5b8c01781a6 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -209,6 +209,9 @@ typedef enum MESA_FORMAT_RGB9_E5_FLOAT, MESA_FORMAT_R11_G11_B10_FLOAT, + MESA_FORMAT_Z32_FLOAT, + MESA_FORMAT_Z32_FLOAT_X24S8, + MESA_FORMAT_COUNT } gl_format; diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 6d7bc735887..37127dcb7a2 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -84,6 +84,7 @@ _mesa_type_is_packed(GLenum type) case GL_UNSIGNED_INT_24_8_EXT: case GL_UNSIGNED_INT_5_9_9_9_REV: case GL_UNSIGNED_INT_10F_11F_11F_REV: + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: return GL_TRUE; } @@ -228,6 +229,8 @@ _mesa_sizeof_packed_type( GLenum type ) return sizeof(GLuint); case GL_UNSIGNED_INT_10F_11F_11F_REV: return sizeof(GLuint); + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return 8; default: return -1; } @@ -379,6 +382,11 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type ) return sizeof(GLuint); else return -1; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (format == GL_DEPTH_STENCIL) + return 8; + else + return -1; default: return -1; } @@ -531,8 +539,10 @@ _mesa_is_legal_format_and_type(const struct gl_context *ctx, else return GL_FALSE; case GL_DEPTH_STENCIL_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil - && type == GL_UNSIGNED_INT_24_8_EXT) + if ((ctx->Extensions.EXT_packed_depth_stencil && + type == GL_UNSIGNED_INT_24_8_EXT) || + (ctx->Extensions.ARB_depth_buffer_float && + type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)) return GL_TRUE; else return GL_FALSE; @@ -884,6 +894,7 @@ _mesa_is_depth_format(GLenum format) case GL_DEPTH_COMPONENT16: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: + case GL_DEPTH_COMPONENT32F: return GL_TRUE; default: return GL_FALSE; @@ -931,6 +942,7 @@ _mesa_is_depthstencil_format(GLenum format) switch (format) { case GL_DEPTH24_STENCIL8_EXT: case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; @@ -956,6 +968,8 @@ _mesa_is_depth_or_stencil_format(GLenum format) case GL_STENCIL_INDEX16_EXT: case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: + case GL_DEPTH_COMPONENT32F: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 0331a8ca2fe..84c5b22286a 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -61,6 +61,14 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, return GL_TRUE; } + if (ctx->Extensions.ARB_depth_buffer_float + && type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV + && format != GL_DEPTH_STENCIL_EXT) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "gl%sPixels(format is not GL_DEPTH_STENCIL_EXT)", readDraw); + return GL_TRUE; + } + /* basic combinations test */ if (!_mesa_is_legal_format_and_type(ctx, format, type)) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -142,10 +150,23 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, } break; case GL_DEPTH_STENCIL_EXT: - if (!ctx->Extensions.EXT_packed_depth_stencil || - type != GL_UNSIGNED_INT_24_8_EXT) { - _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); - return GL_TRUE; + /* Check validity of the type first. */ + switch (type) { + case GL_UNSIGNED_INT_24_8_EXT: + if (!ctx->Extensions.EXT_packed_depth_stencil) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (!ctx->Extensions.ARB_depth_buffer_float) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; } if ((drawing && !_mesa_dest_buffer_exists(ctx, format)) || (reading && !_mesa_source_buffer_exists(ctx, format))) { diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c index c36175c60e7..f5b20020d23 100644 --- a/src/mesa/main/renderbuffer.c +++ b/src/mesa/main/renderbuffer.c @@ -66,6 +66,9 @@ get_datatype_bytes(struct gl_renderbuffer *rb) int component_size; switch (rb->DataType) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + component_size = 8; + break; case GL_FLOAT: case GL_UNSIGNED_INT: case GL_UNSIGNED_INT_24_8_EXT: diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 6716ce1b071..4b85bc32a92 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -913,6 +913,20 @@ texfetch_funcs[MESA_FORMAT_COUNT] = fetch_texel_2d_r11_g11_b10f, fetch_texel_3d_r11_g11_b10f, store_texel_r11_g11_b10f + }, + { + MESA_FORMAT_Z32_FLOAT, + NULL, /* XXX */ + NULL, + NULL, + NULL + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, + NULL, /* XXX */ + NULL, + NULL, + NULL } }; diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 8cbb021d8b0..c919a74e047 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -416,6 +416,19 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat, } } + if (ctx->Extensions.ARB_depth_buffer_float) { + switch (internalFormat) { + case GL_DEPTH_COMPONENT32F: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT]); + return MESA_FORMAT_Z32_FLOAT; + case GL_DEPTH32F_STENCIL8: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8]); + return MESA_FORMAT_Z32_FLOAT_X24S8; + default: + ; /* fallthrough */ + } + } + if (ctx->Extensions.ATI_envmap_bumpmap) { switch (internalFormat) { case GL_DUDV_ATI: diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index e527981ff47..3249e1444e6 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4419,6 +4419,9 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 }, { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, + + { MESA_FORMAT_Z32_FLOAT, NULL /* XXX */ }, + { MESA_FORMAT_Z32_FLOAT_X24S8, /* XXX */ }, }; From 4843c7b24af7408329d33ab16bb946b17244a5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Jun 2011 03:07:16 +0200 Subject: [PATCH 093/113] mesa: implement texfetch functions for depth_buffer_float Reviewed-by: Kenneth Graunke --- src/mesa/main/texfetch.c | 16 ++++++++-------- src/mesa/main/texfetch_tmp.h | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 4b85bc32a92..72283eb68af 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -916,17 +916,17 @@ texfetch_funcs[MESA_FORMAT_COUNT] = }, { MESA_FORMAT_Z32_FLOAT, - NULL, /* XXX */ - NULL, - NULL, - NULL + fetch_texel_1d_f_r_f32, /* Reuse the R32F functions. */ + fetch_texel_2d_f_r_f32, + fetch_texel_3d_f_r_f32, + store_texel_r_f32 }, { MESA_FORMAT_Z32_FLOAT_X24S8, - NULL, /* XXX */ - NULL, - NULL, - NULL + fetch_texel_1d_z32f_x24s8, + fetch_texel_2d_z32f_x24s8, + fetch_texel_3d_z32f_x24s8, + store_texel_z32f_x24s8 } }; diff --git a/src/mesa/main/texfetch_tmp.h b/src/mesa/main/texfetch_tmp.h index e6fd81d4d57..3b1eedf39bf 100644 --- a/src/mesa/main/texfetch_tmp.h +++ b/src/mesa/main/texfetch_tmp.h @@ -2374,6 +2374,29 @@ static void store_texel_r11_g11_b10f(struct gl_texture_image *texImage, #endif +/* MESA_FORMAT_Z32_FLOAT_X24S8 ***********************************************/ + +static void FETCH(z32f_x24s8)(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + texel[RCOMP] = src[0]; + texel[GCOMP] = 0.0F; + texel[BCOMP] = 0.0F; + texel[ACOMP] = 1.0F; +} + +#if DIM == 3 +static void store_texel_z32f_x24s8(struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, const void *texel) +{ + const GLfloat *src = (const GLfloat *) texel; + GLfloat *dst = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + dst[0] = src[0]; +} +#endif + + #undef TEXEL_ADDR #undef DIM #undef FETCH From ec6fbbe36ee198d00db6a1ae297970531186ae3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 05:06:36 +0200 Subject: [PATCH 094/113] mesa: implement stencil unpacking for GL_FLOAT_32_UNSIGNED_INT_24_8_REV Reviewed-by: Kenneth Graunke --- src/mesa/main/pack.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index a232a51c355..c284c7d8d62 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1971,7 +1971,8 @@ extract_uint_indexes(GLuint n, GLuint indexes[], srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); switch (srcType) { case GL_BITMAP: @@ -2142,6 +2143,23 @@ extract_uint_indexes(GLuint n, GLuint indexes[], } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLuint *s = (const GLuint *) src; + if (unpack->SwapBytes) { + for (i = 0; i < n; i++) { + GLuint value = s[i*2+1]; + SWAP4BYTE(value); + indexes[i] = value & 0xff; /* lower 8 bits */ + } + } + else { + for (i = 0; i < n; i++) + indexes[i] = s[i*2+1] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(NULL, "bad srcType in extract_uint_indexes"); @@ -4412,11 +4430,13 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(dstType == GL_UNSIGNED_BYTE || dstType == GL_UNSIGNED_SHORT || - dstType == GL_UNSIGNED_INT); + dstType == GL_UNSIGNED_INT || + dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); /* only shift and offset apply to stencil */ transferOps &= IMAGE_SHIFT_OFFSET_BIT; @@ -4488,6 +4508,15 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, case GL_UNSIGNED_INT: memcpy(dest, indexes, n * sizeof(GLuint)); break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint *dst = (GLuint *) dest; + GLuint i; + for (i = 0; i < n; i++) { + dst[i*2+1] = indexes[i] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(ctx, "bad dstType in _mesa_unpack_stencil_span"); } From bfb63b7d62ccd9618a110f9f5297f87574384058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 05:09:24 +0200 Subject: [PATCH 095/113] mesa: implement depth unpacking for GL_FLOAT_32_UNSIGNED_INT_24_8_REV Reviewed-by: Kenneth Graunke --- src/mesa/main/pack.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index c284c7d8d62..d42ae7bf0f4 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -4827,6 +4827,20 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLfloat *src = (const GLfloat *)source; + for (i = 0; i < n; i++) { + GLfloat value = src[i * 2]; + if (srcPacking->SwapBytes) { + SWAP4BYTE(value); + } + depthValues[i] = value; + } + needClamp = GL_TRUE; + } + break; case GL_FLOAT: DEPTH_VALUES(GLfloat, 1*); needClamp = GL_TRUE; @@ -4903,9 +4917,18 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, zValues[i] = (GLushort) (depthValues[i] * (GLfloat) depthMax); } } + else if (dstType == GL_FLOAT) { + /* Nothing to do. depthValues is pointing to dest. */ + } + else if (dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) { + GLfloat *zValues = (GLfloat*) dest; + GLuint i; + for (i = 0; i < n; i++) { + zValues[i*2] = depthValues[i]; + } + } else { - ASSERT(dstType == GL_FLOAT); - /*ASSERT(depthMax == 1.0F);*/ + ASSERT(0); } free(depthTemp); From bc878c7f8bfdabb40a4f784984c13b94656569ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 13:36:27 +0200 Subject: [PATCH 096/113] mesa: implement texstore for DEPTH_COMPONENT32F Reviewed-by: Kenneth Graunke --- src/mesa/main/texstore.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 3249e1444e6..777773cc14b 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1002,15 +1002,17 @@ memcpy_texture(struct gl_context *ctx, /** - * Store a 32-bit integer depth component texture image. + * Store a 32-bit integer or float depth component texture image. */ static GLboolean _mesa_texstore_z32(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffffff; const GLuint texelBytes = _mesa_get_format_bytes(dstFormat); + const GLenum dstType = _mesa_get_format_datatype(dstFormat); (void) dims; - ASSERT(dstFormat == MESA_FORMAT_Z32); + ASSERT(dstFormat == MESA_FORMAT_Z32 || + dstFormat == MESA_FORMAT_Z32_FLOAT); ASSERT(texelBytes == sizeof(GLuint)); if (ctx->Pixel.DepthScale == 1.0f && @@ -1018,7 +1020,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) !srcPacking->SwapBytes && baseInternalFormat == GL_DEPTH_COMPONENT && srcFormat == GL_DEPTH_COMPONENT && - srcType == GL_UNSIGNED_INT) { + srcType == dstType) { /* simple memcpy path */ memcpy_texture(ctx, dims, dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, @@ -1039,7 +1041,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) const GLvoid *src = _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, row, 0); _mesa_unpack_depth_span(ctx, srcWidth, - GL_UNSIGNED_INT, (GLuint *) dstRow, + dstType, dstRow, depthScale, srcType, src, srcPacking); dstRow += dstRowStride; } @@ -4420,7 +4422,7 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 }, { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, - { MESA_FORMAT_Z32_FLOAT, NULL /* XXX */ }, + { MESA_FORMAT_Z32_FLOAT, _mesa_texstore_z32 }, { MESA_FORMAT_Z32_FLOAT_X24S8, /* XXX */ }, }; From b2f087cd87ab9b1651d221b5c7d7e543a4585d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 05:17:51 +0200 Subject: [PATCH 097/113] mesa: implement texstore for DEPTH32F_STENCIL8 Reviewed-by: Kenneth Graunke --- src/mesa/main/texstore.c | 68 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 777773cc14b..6e1e63bdfb0 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4287,6 +4287,72 @@ _mesa_texstore_r11_g11_b10f(TEXSTORE_PARAMS) } +static GLboolean +_mesa_texstore_z32f_x24s8(TEXSTORE_PARAMS) +{ + ASSERT(dstFormat == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(srcFormat == GL_DEPTH_STENCIL || + srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX); + ASSERT(srcFormat != GL_DEPTH_STENCIL || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + if (srcFormat == GL_DEPTH_STENCIL && + ctx->Pixel.DepthScale == 1.0f && + ctx->Pixel.DepthBias == 0.0f && + !srcPacking->SwapBytes) { + /* simple path */ + memcpy_texture(ctx, dims, + dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, + dstRowStride, + dstImageOffsets, + srcWidth, srcHeight, srcDepth, srcFormat, srcType, + srcAddr, srcPacking); + } + else if (srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX) { + GLint img, row; + const GLint srcRowStride + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) + / sizeof(uint64_t); + + /* In case we only upload depth we need to preserve the stencil */ + for (img = 0; img < srcDepth; img++) { + uint64_t *dstRow = (uint64_t *) dstAddr + + dstImageOffsets[dstZoffset + img] + + dstYoffset * dstRowStride / sizeof(uint64_t) + + dstXoffset; + const uint64_t *src + = (const uint64_t *) _mesa_image_address(dims, srcPacking, srcAddr, + srcWidth, srcHeight, + srcFormat, srcType, + img, 0, 0); + for (row = 0; row < srcHeight; row++) { + /* The unpack functions with: + * dstType = GL_FLOAT_32_UNSIGNED_INT_24_8_REV + * only write their own dword, so the other dword (stencil + * or depth) is preserved. */ + if (srcFormat != GL_STENCIL_INDEX) + _mesa_unpack_depth_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + 1.0f, srcType, src, srcPacking); + + if (srcFormat != GL_DEPTH_COMPONENT) + _mesa_unpack_stencil_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + srcType, src, srcPacking, + ctx->_ImageTransferState); + + src += srcRowStride; + dstRow += dstRowStride / sizeof(uint64_t); + } + } + } + return GL_TRUE; +} + /** * Table mapping MESA_FORMAT_* to _mesa_texstore_*() @@ -4423,7 +4489,7 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, { MESA_FORMAT_Z32_FLOAT, _mesa_texstore_z32 }, - { MESA_FORMAT_Z32_FLOAT_X24S8, /* XXX */ }, + { MESA_FORMAT_Z32_FLOAT_X24S8, _mesa_texstore_z32f_x24s8 }, }; From bde6a044588401ebbd14881cd5621095c221f0a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 14:18:25 +0200 Subject: [PATCH 098/113] mesa: implement generatemipmap for GL_FLOAT_32_UNSIGNED_INT_24_8_REV Reviewed-by: Kenneth Graunke --- src/mesa/main/mipmap.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index f2724dbca7e..8a811cb7225 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -706,6 +706,17 @@ do_row(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + GLuint i, j, k; + const GLfloat *rowA = (const GLfloat *) srcRowA; + const GLfloat *rowB = (const GLfloat *) srcRowB; + GLfloat *dst = (GLfloat *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i*2] = (rowA[j*2] + rowA[k*2] + rowB[j*2] + rowB[k*2]) * 0.25F; + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } @@ -1341,6 +1352,15 @@ do_row_3D(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + DECLARE_ROW_POINTERS(GLfloat, 2); + + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } From ba15e8260ef6697fcd8c1f6ab098469db6fe78ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 1 Jul 2011 02:04:34 +0200 Subject: [PATCH 099/113] mesa: implement depth/stencil renderbuffer wrapper accessors for Z32F_X24S8 Reviewed-by: Kenneth Graunke --- src/mesa/main/depthstencil.c | 322 ++++++++++++++++++++++++++++++++--- src/mesa/main/depthstencil.h | 5 + src/mesa/main/framebuffer.c | 10 +- 3 files changed, 313 insertions(+), 24 deletions(-) diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index ab62c97fe5a..4d0600050ff 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -393,6 +393,217 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, } +static void +get_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + const GLfloat *src = (const GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (!src) { + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + src = temp; + } + for (i = 0; i < count; i++) { + dst[i] = src[i*2]; + } +} + +static void +get_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(count <= MAX_WIDTH); + /* don't bother trying direct access */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + dst[i] = temp[i*2]; + } +} + +static void +put_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + const GLfloat val = *(GLfloat*)value; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = val; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], + const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + *dst = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + /* get, modify, put */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); +} + + +/** + * Wrap the given GL_DEPTH_STENCIL renderbuffer so that it acts like + * a depth renderbuffer. + * \return new depth renderbuffer + */ +struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb) +{ + struct gl_renderbuffer *z32frb; + + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + z32frb = ctx->Driver.NewRenderbuffer(ctx, 0); + if (!z32frb) + return NULL; + + /* NOTE: need to do manual refcounting here */ + z32frb->Wrapped = dsrb; + dsrb->RefCount++; + + z32frb->Name = dsrb->Name; + z32frb->RefCount = 0; + z32frb->Width = dsrb->Width; + z32frb->Height = dsrb->Height; + z32frb->RowStride = dsrb->RowStride; + z32frb->InternalFormat = GL_DEPTH_COMPONENT32F; + z32frb->Format = MESA_FORMAT_Z32_FLOAT; + z32frb->_BaseFormat = GL_DEPTH_COMPONENT; + z32frb->DataType = GL_FLOAT; + z32frb->Data = NULL; + z32frb->Delete = delete_wrapper; + z32frb->AllocStorage = alloc_wrapper_storage; + z32frb->GetPointer = nop_get_pointer; + z32frb->GetRow = get_row_z32f; + z32frb->GetValues = get_values_z32f; + z32frb->PutRow = put_row_z32f; + z32frb->PutRowRGB = NULL; + z32frb->PutMonoRow = put_mono_row_z32f; + z32frb->PutValues = put_values_z32f; + z32frb->PutMonoValues = put_mono_values_z32f; + + return z32frb; +} + + /*====================================================================== * Stencil wrapper around depth/stencil renderbuffer */ @@ -402,16 +613,22 @@ get_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, GLint x, GLint y, void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; const GLuint *src = (const GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (!src) { dsrb->GetRow(ctx, dsrb, count, x, y, temp); src = temp; } - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = src[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = src[i] & 0xff; } @@ -429,14 +646,20 @@ get_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count const GLint x[], const GLint y[], void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(count <= MAX_WIDTH); /* don't bother trying direct access */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = temp[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = temp[i] & 0xff; } @@ -457,11 +680,19 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, const GLubyte *src = (const GLubyte *) values; GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | src[i]; @@ -479,9 +710,16 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -508,11 +746,19 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou const GLubyte val = *((GLubyte *) value); GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | val; @@ -530,9 +776,16 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -559,11 +812,20 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count struct gl_renderbuffer *dsrb = s8rb->Wrapped; const GLubyte *src = (const GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + dst[1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); @@ -583,9 +845,16 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -610,11 +879,18 @@ put_mono_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint const void *value, const GLubyte *mask) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; const GLubyte val = *((GLubyte *) value); /* get, modify, put */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -644,8 +920,10 @@ _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer struct gl_renderbuffer *s8rb; ASSERT(dsrb->Format == MESA_FORMAT_Z24_S8 || - dsrb->Format == MESA_FORMAT_S8_Z24); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + dsrb->Format == MESA_FORMAT_S8_Z24 || + dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); s8rb = ctx->Driver.NewRenderbuffer(ctx, 0); if (!s8rb) diff --git a/src/mesa/main/depthstencil.h b/src/mesa/main/depthstencil.h index ef63c5d7a31..b47a2e482c2 100644 --- a/src/mesa/main/depthstencil.h +++ b/src/mesa/main/depthstencil.h @@ -33,6 +33,11 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer *dsrb); +extern struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb); + + extern struct gl_renderbuffer * _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer *dsrb); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 66c9bd91096..6e2ce74212e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -631,8 +631,14 @@ _mesa_update_depth_buffer(struct gl_context *ctx, || fb->_DepthBuffer->Wrapped != depthRb || _mesa_get_format_base_format(fb->_DepthBuffer->Format) != GL_DEPTH_COMPONENT) { /* need to update wrapper */ - struct gl_renderbuffer *wrapper - = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + struct gl_renderbuffer *wrapper; + + if (depthRb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + wrapper = _mesa_new_z32f_renderbuffer_wrapper(ctx, depthRb); + } + else { + wrapper = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + } _mesa_reference_renderbuffer(&fb->_DepthBuffer, wrapper); ASSERT(fb->_DepthBuffer->Wrapped == depthRb); } From adea7ea0bc2683c0ee544e20074062df9ae5a72b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 1 Jun 2011 15:49:33 +0200 Subject: [PATCH 100/113] st/mesa: initial ARB_depth_buffer_float support --- src/mesa/state_tracker/st_cb_clear.c | 6 ++++-- src/mesa/state_tracker/st_extensions.c | 11 +++++++++++ src/mesa/state_tracker/st_format.c | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 181fedd2b99..117000ba716 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -381,7 +381,8 @@ check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuff assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (ctx->Scissor.Enabled && (ctx->Scissor.X != 0 || @@ -436,7 +437,8 @@ check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (maskStencil) return GL_TRUE; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d3aebe526dd..99b231d9706 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -607,4 +607,15 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_SM3)) { ctx->Extensions.ARB_shader_texture_lod = GL_TRUE; } + + if (screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW) && + screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW)) { + ctx->Extensions.ARB_depth_buffer_float = GL_TRUE; + } } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index d1995f1ee1d..bd4f0860c52 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -95,6 +95,9 @@ st_format_datatype(enum pipe_format format) format == PIPE_FORMAT_X8Z24_UNORM) { return GL_UNSIGNED_INT_24_8; } + else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + return GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + } else { const GLuint size = format_max_bits(format); @@ -205,6 +208,10 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat) return PIPE_FORMAT_Z24X8_UNORM; case MESA_FORMAT_S8: return PIPE_FORMAT_S8_USCALED; + case MESA_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_Z32_FLOAT; + case MESA_FORMAT_Z32_FLOAT_X24S8: + return PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED; case MESA_FORMAT_YCBCR: return PIPE_FORMAT_UYVY; #if FEATURE_texture_s3tc @@ -427,6 +434,10 @@ st_pipe_format_to_mesa_format(enum pipe_format format) return MESA_FORMAT_X8_Z24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return MESA_FORMAT_S8_Z24; + case PIPE_FORMAT_Z32_FLOAT: + return MESA_FORMAT_Z32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return MESA_FORMAT_Z32_FLOAT_X24S8; case PIPE_FORMAT_UYVY: return MESA_FORMAT_YCBCR; @@ -784,6 +795,10 @@ static const struct format_mapping format_map[] = { { GL_DEPTH_COMPONENT, 0 }, { DEFAULT_DEPTH_FORMATS } }, + { + { GL_DEPTH_COMPONENT32F, 0 }, + { PIPE_FORMAT_Z32_FLOAT, 0 } + }, /* stencil formats */ { @@ -800,6 +815,10 @@ static const struct format_mapping format_map[] = { { GL_DEPTH_STENCIL_EXT, GL_DEPTH24_STENCIL8_EXT, 0 }, { PIPE_FORMAT_Z24_UNORM_S8_USCALED, PIPE_FORMAT_S8_USCALED_Z24_UNORM, 0 } }, + { + { GL_DEPTH32F_STENCIL8, 0 }, + { PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, 0 } + }, /* sRGB formats */ { From e517e5ac7c89b133f341a1075db95e14d0ba23c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Jun 2011 19:09:24 +0200 Subject: [PATCH 101/113] st/mesa: implement read/draw/copypixels for Z32F and Z32F_S8X24 --- src/mesa/state_tracker/st_cb_drawpixels.c | 64 +++++++++++++++++++---- src/mesa/state_tracker/st_cb_readpixels.c | 43 +++++++++++++++ 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d61d7ac22be..dca3324645c 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -812,6 +812,7 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, for (row = 0; row < height; row++) { GLubyte sValues[MAX_WIDTH]; GLuint zValues[MAX_WIDTH]; + GLfloat *zValuesFloat = (GLfloat*)zValues; GLenum destType = GL_UNSIGNED_BYTE; const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, width, height, @@ -822,7 +823,11 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, ctx->_ImageTransferState); if (format == GL_DEPTH_STENCIL) { - _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, + GLenum ztype = + pt->resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ? + GL_FLOAT : GL_UNSIGNED_INT; + + _mesa_unpack_depth_span(ctx, spanWidth, ztype, zValues, (1 << 24) - 1, type, source, &clippedUnpack); } @@ -887,6 +892,26 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLfloat *destf = (GLfloat*)dest; + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + destf[k*2] = zValuesFloat[k]; + dest[k*2+1] = sValues[k] & 0xff; + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k*2+1] = sValues[k] & 0xff; + } + } + break; default: assert(0); } @@ -994,14 +1019,23 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GL_NONE, GL_NONE, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - stencil_format = PIPE_FORMAT_X24S8_USCALED; - else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) - stencil_format = PIPE_FORMAT_S8X24_USCALED; - else - stencil_format = PIPE_FORMAT_S8_USCALED; - if (stencil_format == PIPE_FORMAT_NONE) - goto stencil_fallback; + + switch (tex_format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + stencil_format = PIPE_FORMAT_X24S8_USCALED; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + stencil_format = PIPE_FORMAT_S8X24_USCALED; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; + break; + case PIPE_FORMAT_S8_USCALED: + stencil_format = PIPE_FORMAT_S8_USCALED; + break; + default: + goto stencil_fallback; + } } /* Mesa state should be up to date by now */ @@ -1188,6 +1222,18 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, assert(usage == PIPE_TRANSFER_WRITE); memcpy(dst, src, width); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + { + uint *dst4 = (uint *) dst; + int j; + dst4++; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = src[j] & 0xff; + dst4 += 2; + } + } + break; default: assert(0); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 67926e39297..02ddad7b2f0 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -151,6 +151,24 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + const uint *src = (uint *) (stmap + srcY * pt->stride); + const GLfloat *srcf = (const GLfloat*)src; + GLint k; + for (k = 0; k < width; k++) { + zValues[k] = srcf[k*2]; + sValues[k] = src[k*2+1] & 0xff; + } + } + else { + const uint *src = (uint *) (stmap + srcY * pt->stride); + GLint k; + for (k = 0; k < width; k++) { + sValues[k] = src[k*2+1] & 0xff; + } + } + break; default: assert(0); } @@ -568,6 +586,31 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h dst += dstStride; } } + else if (pformat == PIPE_FORMAT_Z32_FLOAT) { + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat, 0); + y += yStep; + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + assert(format == GL_DEPTH_COMPONENT); + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; /* Z32 */ + GLfloat zfloat2[MAX_WIDTH*2]; /* Z32X32 */ + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat2, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = zfloat2[j*2]; + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } else { /* RGBA format */ /* Do a row at a time to flip image data vertically */ From 8ff6f90c3f138fbe922d1eee6cecffc87234ef34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Jun 2011 23:38:36 +0200 Subject: [PATCH 102/113] gallium/util: implement pack functions for Z32F and Z32F_S8X24 --- src/gallium/auxiliary/util/u_pack_color.h | 64 +++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 5378f2d782f..9391f1b80e0 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -458,6 +458,19 @@ util_pack_mask_z(enum pipe_format format, uint32_t z) } } + +static INLINE uint64_t +util_pack64_mask_z(enum pipe_format format, uint32_t z) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return z; + default: + return util_pack_mask_z(format, z); + } +} + + static INLINE uint32_t util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) { @@ -481,6 +494,21 @@ util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) } +static INLINE uint64_t +util_pack64_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) +{ + uint64_t packed; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + packed = util_pack64_mask_z(format, z); + packed |= (uint64_t)s << 32ull; + return packed; + default: + return util_pack_mask_z_stencil(format, z, s); + } +} + /** * Note: it's assumed that z is in [0,1] @@ -525,6 +553,24 @@ util_pack_z(enum pipe_format format, double z) return 0; } } + + +static INLINE uint64_t +util_pack64_z(enum pipe_format format, double z) +{ + union fi fui; + + if (z == 0) + return 0; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + fui.f = (float)z; + return fui.ui; + default: + return util_pack_z(format, z); + } +} /** @@ -554,6 +600,24 @@ util_pack_z_stencil(enum pipe_format format, double z, uint8_t s) } +static INLINE uint64_t +util_pack64_z_stencil(enum pipe_format format, double z, uint8_t s) +{ + uint64_t packed; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + packed = util_pack64_z(format, z); + packed |= (uint64_t)s << 32ull; + break; + default: + return util_pack_z_stencil(format, z, s); + } + + return packed; +} + + /** * Pack 4 ubytes into a 4-byte word */ From e860cb64dbcb170207641fc280e47858fae74891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Jun 2011 23:39:51 +0200 Subject: [PATCH 103/113] gallium/util: implement software Z32F_S8X24 depth-stencil clear --- src/gallium/auxiliary/util/u_surface.c | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 4c5cc4da182..8e123867da6 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -358,8 +358,41 @@ util_clear_depth_stencil(struct pipe_context *pipe, dst_map += dst_stride; } } - break; + break; case 8: + { + uint64_t zstencil = util_pack64_z_stencil(dst->texture->format, + depth, stencil); + + assert(dst->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED); + + if (!need_rmw) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = zstencil; + dst_map += dst_stride; + } + } + else { + uint64_t src_mask; + + if (clear_flags & PIPE_CLEAR_DEPTH) + src_mask = 0x00000000ffffffffull; + else + src_mask = 0x000000ff00000000ull; + + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) { + uint64_t tmp = *row & ~src_mask; + *row++ = tmp | (zstencil & src_mask); + } + dst_map += dst_stride; + } + } + break; + } default: assert(0); break; From d9ab6712ccb5a7249feaaf071fefdc78e80808a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 21:14:07 +0200 Subject: [PATCH 104/113] gallium/util: handle Z32F_FLOAT_S8X24_USCALED in pipe_tile_raw_to_rgba And make pipe_put_tile_rgba_format no-op like the other Z formats. --- src/gallium/auxiliary/util/u_tile.c | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index e3c7085ba92..23f12e5f464 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -318,6 +318,32 @@ z32f_get_tile_rgba(const float *src, } } +/*** PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32f_x24s8_get_tile_rgba(const float *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src; + src += 2; + } + p += dst_stride; + } +} + void pipe_tile_raw_to_rgba(enum pipe_format format, @@ -352,6 +378,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + z32f_x24s8_get_tile_rgba((float *) src, w, h, dst, dst_stride); + break; default: util_format_read_4f(format, dst, dst_stride * sizeof(float), @@ -445,6 +474,12 @@ pipe_put_tile_rgba_format(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; + case PIPE_FORMAT_Z32_FLOAT: + /*z32f_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + /*z32f_s8x24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; default: util_format_write_4f(format, p, src_stride * sizeof(float), From 89954723bfeef59d055d2332ff112f0204b48130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Jun 2011 19:40:41 +0200 Subject: [PATCH 105/113] r600g: depth_buffer_float support on r600-r700 --- src/gallium/drivers/r600/r600_state.c | 12 ++++++++++++ src/gallium/drivers/r600/r600_texture.c | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 203b39f855f..01406f2bad6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -263,6 +263,10 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028010_DEPTH_X8_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028010_DEPTH_8_24; + case PIPE_FORMAT_Z32_FLOAT: + return V_028010_DEPTH_32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028010_DEPTH_X24_8_32_FLOAT; default: return ~0U; } @@ -353,6 +357,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16_UNORM: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_0280A0_SWAP_STD; /* 64-bit buffers. */ @@ -360,6 +365,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -444,7 +450,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_0280A0_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_0280A0_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -532,6 +542,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_32_FLOAT: case V_0280A0_COLOR_32_32: + case V_0280A0_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 128-bit buffers. */ @@ -635,6 +646,7 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 18460419f85..37e75be6cf2 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -851,6 +851,12 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, result = FMT_8; word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; + case PIPE_FORMAT_Z32_FLOAT: + result = FMT_32_FLOAT; + goto out_word4; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + result = FMT_X24_8_32_FLOAT; + goto out_word4; default: goto out_unknown; } From 3414447011b6b25aeab22f4949a96c09cf4c5098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Jun 2011 02:10:55 +0200 Subject: [PATCH 106/113] docs: update GL3 status --- docs/GL3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 49b48472a4a..135bc4bab67 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -19,7 +19,7 @@ Clamping controls (GL_ARB_color_buffer_float) DONE Float textures, renderbuffers (GL_ARB_texture_float) DONE (gallium r300) GL_EXT_packed_float DONE (gallium r600) GL_EXT_texture_shared_exponent DONE (gallium, swrast) -Float depth buffers (GL_ARB_depth_buffer_float) not started +Float depth buffers (GL_ARB_depth_buffer_float) DONE Framebuffer objects (GL_EXT_framebuffer_object) DONE Half-float DONE Multisample blit DONE From 83478e5d5944e1fc320e8cfb10ba75055bbea3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 Jul 2011 20:01:33 +0200 Subject: [PATCH 107/113] mesa: return early if mask is cleared to zero in BlitFramebuffer From ARB_framebuffer_object: If a buffer is specified in and does not exist in both the read and draw framebuffers, the corresponding bit is silently ignored. --- src/mesa/main/fbobject.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index d094dd35a69..84969360d92 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2603,6 +2603,10 @@ _mesa_BlitFramebufferEXT(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } } + if (!mask) { + return; + } + ASSERT(ctx->Driver.BlitFramebuffer); ctx->Driver.BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, From d1214cca084f277b5acc913490d354edbd4b990f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 Jul 2011 20:03:05 +0200 Subject: [PATCH 108/113] swrast: fix depth/stencil blits when there's no colorbuffer NOTE: This is a candidate for the 7.10 and 7.11 branches. --- src/mesa/swrast/s_blit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/swrast/s_blit.c b/src/mesa/swrast/s_blit.c index 3516a41bf41..7f53f19eb62 100644 --- a/src/mesa/swrast/s_blit.c +++ b/src/mesa/swrast/s_blit.c @@ -568,9 +568,6 @@ _swrast_BlitFramebuffer(struct gl_context *ctx, }; GLint i; - if (!ctx->DrawBuffer->_NumColorDrawBuffers) - return; - if (!_mesa_clip_blit(ctx, &srcX0, &srcY0, &srcX1, &srcY1, &dstX0, &dstY0, &dstX1, &dstY1)) { return; From 91a52dae97379d118965567b5c11e393996baeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 23:22:37 +0200 Subject: [PATCH 109/113] WIP r600g: depth_buffer_float renderbuffer support on evergreen --- src/gallium/drivers/r600/evergreen_state.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fbf25feaf20..acc591f2d6e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -265,6 +265,9 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028040_Z_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028040_Z_24; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028040_Z_32_FLOAT; default: return ~0U; } @@ -272,7 +275,8 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) static uint32_t r600_translate_stencilformat(enum pipe_format format) { - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) return 1; else return 0; @@ -360,6 +364,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -369,6 +374,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -453,7 +459,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_028C70_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028C70_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_028C70_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -541,6 +551,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: + case V_028C70_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 96-bit buffers. */ @@ -2123,6 +2134,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); From 01f48a979d85525acd060c8055ec835a1b56ea87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 01:39:53 +0200 Subject: [PATCH 110/113] mesa: implement packing of DEPTH_STENCIL & FLOAT_32_UNSIGNED_INT_24_8_REV combo Tested with the new piglit fbo-depthstencil test. --- src/mesa/main/pack.c | 21 ++++++++++++++++----- src/mesa/main/pack.h | 4 ++-- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/swrast/s_readpix.c | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index d42ae7bf0f4..7de1d05b919 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -5056,10 +5056,11 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, /** - * Pack depth and stencil values as GL_DEPTH_STENCIL/GL_UNSIGNED_INT_24_8. + * Pack depth and stencil values as GL_DEPTH_STENCIL (GL_UNSIGNED_INT_24_8 etc) */ void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking) @@ -5089,9 +5090,19 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, stencilVals = stencilCopy; } - for (i = 0; i < n; i++) { - GLuint z = (GLuint) (depthVals[i] * 0xffffff); - dest[i] = (z << 8) | (stencilVals[i] & 0xff); + switch (dstType) { + case GL_UNSIGNED_INT_24_8: + for (i = 0; i < n; i++) { + GLuint z = (GLuint) (depthVals[i] * 0xffffff); + dest[i] = (z << 8) | (stencilVals[i] & 0xff); + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + for (i = 0; i < n; i++) { + ((GLfloat*)dest)[i*2] = depthVals[i]; + dest[i*2+1] = stencilVals[i] & 0xff; + } + break; } if (dstPacking->SwapBytes) { diff --git a/src/mesa/main/pack.h b/src/mesa/main/pack.h index 78238ea5839..00aab409e42 100644 --- a/src/mesa/main/pack.h +++ b/src/mesa/main/pack.h @@ -130,8 +130,8 @@ _mesa_pack_depth_span(struct gl_context *ctx, GLuint n, GLvoid *dest, extern void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, - GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking); diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 02ddad7b2f0..e2b29fe3068 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -177,7 +177,7 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, dest = _mesa_image_address2d(packing, pixels, width, height, format, type, j, 0); if (format == GL_DEPTH_STENCIL) { - _mesa_pack_depth_stencil_span(ctx, width, dest, + _mesa_pack_depth_stencil_span(ctx, width, type, dest, zValues, sValues, packing); } else { diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 214f2ea1aaa..66ca39293a6 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -446,7 +446,7 @@ read_depth_stencil_pixels(struct gl_context *ctx, GLfloat depthVals[MAX_WIDTH]; _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i, depthVals); - _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst, + _mesa_pack_depth_stencil_span(ctx, width, type, depthStencilDst, depthVals, stencilVals, packing); } } From daf6604435594b2ec861a40eaf4c5a23c97c0714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 04:01:06 +0200 Subject: [PATCH 111/113] r600g: zero memory of ioctl parameters Fixes valgrind warning. --- src/gallium/winsys/r600/drm/r600_drm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 4602f7f2a4b..fa2d77ed464 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -186,7 +186,7 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) static int radeon_drm_get_tiling(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; int r; uint32_t tiling_config = 0; @@ -208,7 +208,7 @@ static int radeon_drm_get_tiling(struct radeon *radeon) static int radeon_get_clock_crystal_freq(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; uint32_t clock_crystal_freq; int r; @@ -226,7 +226,7 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon) static int radeon_get_num_backends(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; uint32_t num_backends; int r; From dc9d789d1b39f9702c179a1d60f76535352563df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 04:13:35 +0200 Subject: [PATCH 112/113] r600g: more valgrind fixes --- src/gallium/winsys/r600/drm/r600_drm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index fa2d77ed464..b5a4d928bf5 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -209,7 +209,7 @@ static int radeon_drm_get_tiling(struct radeon *radeon) static int radeon_get_clock_crystal_freq(struct radeon *radeon) { struct drm_radeon_info info = {}; - uint32_t clock_crystal_freq; + uint32_t clock_crystal_freq = 0; int r; info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; @@ -227,7 +227,7 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon) static int radeon_get_num_backends(struct radeon *radeon) { struct drm_radeon_info info = {}; - uint32_t num_backends; + uint32_t num_backends = 0; int r; info.request = RADEON_INFO_NUM_BACKENDS; From 12265d26ddc72f62de927ac24e12ab41fcd8d1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 04:17:37 +0200 Subject: [PATCH 113/113] Revert "WIP r600g: depth_buffer_float renderbuffer support on evergreen" This reverts commit 91a52dae97379d118965567b5c11e393996baeb9. Pushed accidentally. --- src/gallium/drivers/r600/evergreen_state.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index acc591f2d6e..fbf25feaf20 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -265,9 +265,6 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028040_Z_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028040_Z_24; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - return V_028040_Z_32_FLOAT; default: return ~0U; } @@ -275,8 +272,7 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) static uint32_t r600_translate_stencilformat(enum pipe_format format) { - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || - format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) return 1; else return 0; @@ -364,7 +360,6 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -374,7 +369,6 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -459,11 +453,7 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_028C70_COLOR_24_8; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - return V_028C70_COLOR_X24_8_32_FLOAT; - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT: return V_028C70_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -551,7 +541,6 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: - case V_028C70_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 96-bit buffers. */ @@ -2134,7 +2123,6 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);