From 7eb7d67d50fccb64248d1fc6f490895048d7d32e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 7 Jul 2011 16:47:59 -0600 Subject: [PATCH 001/110] glsl: use casts to silence warning --- src/glsl/linker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 265da84e5a9..34b64837a46 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1248,7 +1248,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, */ const int generic_base = (target_index == MESA_SHADER_VERTEX) - ? VERT_ATTRIB_GENERIC0 : FRAG_RESULT_DATA0; + ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; const enum ir_variable_mode direction = (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out; From d8f65c07e9f3a5948c8bee95482bcab651b33c01 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 7 Jul 2011 17:29:28 -0600 Subject: [PATCH 002/110] intel: add null src pointer check in intel_region_reference() Fixes segfault when running cubemap demo on i945. This happened when intel_region_reference() was called in i915_set_draw_region() with depth_region=NULL. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/intel/intel_regions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 6cf37c4c40c..4c4945c7941 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -271,7 +271,8 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) if (*dst) intel_region_release(dst); - src->refcount++; + if (src) + src->refcount++; *dst = src; } } From 8c1a2e128ea44bae6dc89524116c4a170cdc2b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Thu, 7 Jul 2011 21:58:31 -0700 Subject: [PATCH 003/110] i915g: Make the optimizer more generic. --- src/gallium/drivers/i915/i915_fpc_optimize.c | 85 +++++++++++++------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index e60c27e5473..2b739e9ccb8 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -55,18 +55,15 @@ static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_ } static boolean is_unswizzled(struct i915_full_src_register* r, - int sx, - int sy, - int sz, - int sw) + unsigned write_mask) { - if (sx && r->Register.SwizzleX != TGSI_SWIZZLE_X) + if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) return FALSE; - if (sy && r->Register.SwizzleY != TGSI_SWIZZLE_Y) + if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) return FALSE; - if (sz && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) + if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) return FALSE; - if (sw && r->Register.SwizzleW != TGSI_SWIZZLE_W) + if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) return FALSE; return TRUE; } @@ -89,6 +86,35 @@ static unsigned op_neutral_element(unsigned opcode) return TGSI_SWIZZLE_ZERO; } +/* + * Sets the swizzle to the neutral element for the operation for the bits + * of writemask which are set, swizzle to identity otherwise. + */ +static void set_neutral_element_swizzle(struct i915_full_src_register* r, + unsigned write_mask, + unsigned neutral) +{ + if ( write_mask & TGSI_WRITEMASK_X ) + r->Register.SwizzleX = neutral; + else + r->Register.SwizzleX = TGSI_SWIZZLE_X; + + if ( write_mask & TGSI_WRITEMASK_Y ) + r->Register.SwizzleY = neutral; + else + r->Register.SwizzleY = TGSI_SWIZZLE_Y; + + if ( write_mask & TGSI_WRITEMASK_Z ) + r->Register.SwizzleZ = neutral; + else + r->Register.SwizzleZ = TGSI_SWIZZLE_Z; + + if ( write_mask & TGSI_WRITEMASK_W ) + r->Register.SwizzleW = neutral; + else + r->Register.SwizzleW = TGSI_SWIZZLE_W; +} + /* * Optimize away things like: * MUL OUT[0].xyz, TEMP[1], TEMP[2] @@ -96,47 +122,52 @@ static unsigned op_neutral_element(unsigned opcode) * into: * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] * This is useful for optimizing texenv. - * XXX also handle swizzles other than XYZ/W */ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) { if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && op_commutes(current->FullInstruction.Instruction.Opcode) && - current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && - current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && - next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && - is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && - is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && - is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; - current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; - current->FullInstruction.Src[0].Register.SwizzleW = op_neutral_element(current->FullInstruction.Instruction.Opcode); - current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; return; } if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && op_commutes(current->FullInstruction.Instruction.Opcode) && - current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && - current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && - next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && - is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && - is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && - is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) + is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) { next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; - current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; - current->FullInstruction.Src[1].Register.SwizzleW = op_neutral_element(current->FullInstruction.Instruction.Opcode); - current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; + + set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); + set_neutral_element_swizzle(¤t->FullInstruction.Src[1], + next->FullInstruction.Dst[0].Register.WriteMask, + op_neutral_element(current->FullInstruction.Instruction.Opcode)); + + current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | + next->FullInstruction.Dst[0].Register.WriteMask; return; } } From a65e9706035c0e348307e76fdeeed0910ec8c68e Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 8 Jul 2011 08:26:29 +0200 Subject: [PATCH 004/110] mesa/st: Fix piglit read-front with new drawable invalidation v2 When the state tracker adds a front buffer, nothing triggers a validate drawable call, since the state tracker manager is never notified. Force a validate drawable call by invalidating the framebuffer's stamp, so that the window system's renderbuffer (if any) is picked up. This fixes bug 38988 https://bugs.freedesktop.org/show_bug.cgi?id=38988 Signed-off-by: Thomas Hellstrom --- src/mesa/state_tracker/st_manager.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index a8c4b5c3f49..7bd82aae206 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -918,6 +918,15 @@ st_manager_add_color_renderbuffer(struct st_context *st, return FALSE; st_framebuffer_update_attachments(stfb); + + /* + * Force a call to the state tracker manager to validate the + * new renderbuffer. It might be that there is a window system + * renderbuffer available. + */ + if(stfb->iface) + stfb->iface_stamp = p_atomic_read(&stfb->iface->stamp) - 1; + st_invalidate_state(st->ctx, _NEW_BUFFERS); return TRUE; From 292148dc4b18958d4447df7596311bd2f09fd44f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Jun 2011 19:01:25 +0200 Subject: [PATCH 005/110] st/mesa: use the first non-VOID channel in st_format_datatype Otherwise PIPE_FORMAT_X8B8G8R8_UNORM and friends would fail. NOTE: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_format.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index fa5d8f5050a..3260297c6c1 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -68,10 +68,18 @@ GLenum st_format_datatype(enum pipe_format format) { const struct util_format_description *desc; + int i; desc = util_format_description(format); assert(desc); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { if (format == PIPE_FORMAT_B5G5R5A1_UNORM || format == PIPE_FORMAT_B5G6R5_UNORM) { @@ -85,21 +93,26 @@ st_format_datatype(enum pipe_format format) } else { const GLuint size = format_max_bits(format); + + assert(i < 4); + if (i == 4) + return GL_NONE; + if (size == 8) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_BYTE; else return GL_BYTE; } else if (size == 16) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_SHORT; else return GL_SHORT; } else { assert( size <= 32 ); - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_INT; else return GL_INT; From 7de28e80dcd4239a780b0f5fdc6e61e6e56a68aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Jun 2011 18:57:59 +0200 Subject: [PATCH 006/110] st/mesa: handle float formats in st_format_datatype NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_format.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 3260297c6c1..d1995f1ee1d 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -85,6 +85,10 @@ st_format_datatype(enum pipe_format format) format == PIPE_FORMAT_B5G6R5_UNORM) { return GL_UNSIGNED_SHORT; } + else if (format == PIPE_FORMAT_R11G11B10_FLOAT || + format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + return GL_FLOAT; + } else if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format == PIPE_FORMAT_S8_USCALED_Z24_UNORM || format == PIPE_FORMAT_Z24X8_UNORM || @@ -105,18 +109,26 @@ st_format_datatype(enum pipe_format format) return GL_BYTE; } else if (size == 16) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_HALF_FLOAT; if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_SHORT; else return GL_SHORT; } - else { - assert( size <= 32 ); + else if (size <= 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_FLOAT; if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_INT; else return GL_INT; } + else { + assert(size == 64); + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT); + return GL_DOUBLE; + } } } else if (format == PIPE_FORMAT_UYVY) { From fc98444bd58960e6cab28423365923bc7e7af3e1 Mon Sep 17 00:00:00 2001 From: Gustaw Smolarczyk Date: Wed, 6 Jul 2011 23:12:11 +0200 Subject: [PATCH 007/110] gallivm: Fix build with llvm-3.0 LLVM 3.0svn changes pretty rapidly. The change in Target->createMCInstPrinter() signature which inspired commits 40ae214067673edbda79371969d1730b6194d83e and 92e29dc5b0474c073b0f05d60629fc6c3decfca4 has been reverted. Signed-off-by: Gustaw Smolarczyk Signed-off-by: Brian Paul --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 01e660ef7d9..29dfb868d95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -207,21 +207,13 @@ lp_disassemble(const void* func) } raw_debug_ostream Out; -#if HAVE_LLVM >= 0x0300 - TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), ""); -#else - TargetMachine *TM = T->createTargetMachine(Triple, ""); -#endif #if HAVE_LLVM >= 0x0300 unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #else int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #endif -#if HAVE_LLVM >= 0x0300 - OwningPtr Printer( - T->createMCInstPrinter(*TM, AsmPrinterVariant, *AsmInfo)); -#elif HAVE_LLVM >= 0x0208 +#if HAVE_LLVM >= 0x0208 OwningPtr Printer( T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo)); #else @@ -233,6 +225,12 @@ lp_disassemble(const void* func) return; } +#if HAVE_LLVM >= 0x0300 + TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), ""); +#else + TargetMachine *TM = T->createTargetMachine(Triple, ""); +#endif + const TargetInstrInfo *TII = TM->getInstrInfo(); /* From 5fb79fc69f56cf2d8d44e4c6c2d8b862bc631139 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 5 Jul 2011 11:29:40 -0700 Subject: [PATCH 008/110] glsl: Remove unused function prototypes. No functional change. Remove prototypes for do_mod_to_fract() and do_sub_to_add_neg(), which haven't existed since November 2010. --- src/glsl/ir_optimization.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index dd265673c55..59a040751d9 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -56,10 +56,8 @@ bool do_if_simplification(exec_list *instructions); bool do_discard_simplification(exec_list *instructions); bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); bool do_mat_op_to_vec(exec_list *instructions); -bool do_mod_to_fract(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions); bool do_structure_splitting(exec_list *instructions); -bool do_sub_to_add_neg(exec_list *instructions); bool do_swizzle_swizzle(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); From f4830be938c8fa33086f73cab19a53ab3e14cb9c Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 29 Jun 2011 15:30:40 -0700 Subject: [PATCH 009/110] glsl: Make ir_reader able to read plain (return) statements. Previously ir_reader was only able to handle return of non-void. This patch is necessary in order to allow optimization passes to be tested in isolation. Reviewed-by: Kenneth Graunke --- src/glsl/ir_reader.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index 30df257be2f..f3a621734ba 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -482,19 +482,21 @@ ir_reader::read_return(s_expression *expr) { s_expression *s_retval; - s_pattern pat[] = { "return", s_retval}; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (return )"); + s_pattern return_value_pat[] = { "return", s_retval}; + s_pattern return_void_pat[] = { "return" }; + if (MATCH(expr, return_value_pat)) { + ir_rvalue *retval = read_rvalue(s_retval); + if (retval == NULL) { + ir_read_error(NULL, "when reading return value"); + return NULL; + } + return new(mem_ctx) ir_return(retval); + } else if (MATCH(expr, return_void_pat)) { + return new(mem_ctx) ir_return; + } else { + ir_read_error(expr, "expected (return ) or (return)"); return NULL; } - - ir_rvalue *retval = read_rvalue(s_retval); - if (retval == NULL) { - ir_read_error(NULL, "when reading return value"); - return NULL; - } - - return new(mem_ctx) ir_return(retval); } From e2c748aec5363981a05f21f26a0c4d37ccf6419d Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 29 Jun 2011 10:28:40 -0700 Subject: [PATCH 010/110] glsl: Add explanatory comments to lower_jumps.cpp. No functional change. Reviewed-by: Kenneth Graunke --- src/glsl/lower_jumps.cpp | 336 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 322 insertions(+), 14 deletions(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index dd2601d1aad..da85c6b49c0 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -60,12 +60,76 @@ #include #include "ir.h" +/** + * Enum recording the result of analyzing how control flow might exit + * an IR node. + * + * Each possible value of jump_strength indicates a strictly stronger + * guarantee on control flow than the previous value. + * + * The ordering of strengths roughly reflects the way jumps are + * lowered: jumps with higher strength tend to be lowered to jumps of + * lower strength. Accordingly, strength is used as a heuristic to + * determine which lowering to perform first. + * + * This enum is also used by get_jump_strength() to categorize + * instructions as either break, continue, return, or other. When + * used in this fashion, strength_always_clears_execute_flag is not + * used. + * + * The control flow analysis made by this optimization pass makes two + * simplifying assumptions: + * + * - It ignores discard instructions, since they are lowered by a + * separate pass (lower_discard.cpp). + * + * - It assumes it is always possible for control to flow from a loop + * to the instruction immediately following it. Technically, this + * is not true (since all execution paths through the loop might + * jump back to the top, or return from the function). + * + * Both of these simplifying assumtions are safe, since they can never + * cause reachable code to be incorrectly classified as unreachable; + * they can only do the opposite. + */ enum jump_strength { + /** + * Analysis has produced no guarantee on how control flow might + * exit this IR node. It might fall out the bottom (with or + * without clearing the execute flag, if present), or it might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_none, + + /** + * The only way control can fall out the bottom of this node is + * through a code path that clears the execute flag. It might also + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_always_clears_execute_flag, + + /** + * Control cannot fall out the bottom of this node. It might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_continue, + + /** + * Control cannot fall out the bottom of this node, or continue the + * top of the innermost enclosing loop. It can only break out of + * it or return from the function. + */ strength_break, + + /** + * Control cannot fall out the bottom of this node, continue to the + * top of the innermost enclosing loop, or break out of it. It can + * only return from the function. + */ strength_return }; @@ -180,6 +244,27 @@ struct function_record }; struct ir_lower_jumps_visitor : public ir_control_flow_visitor { + /* Postconditions: on exit of any visit() function: + * + * ANALYSIS: this->block.min_strength, + * this->block.may_clear_execute_flag, and + * this->loop.may_set_return_flag are updated to reflect the + * characteristics of the visited statement. + * + * DEAD_CODE_ELIMINATION: If this->block.min_strength is not + * strength_none, the visited node is at the end of its exec_list. + * In other words, any unreachable statements that follow the + * visited statement in its exec_list have been removed. + * + * CONTAINED_JUMPS_LOWERED: If the visited statement contains other + * statements, then should_lower_jump() is false for all of the + * return, break, or continue statements it contains. + * + * Note that visiting a jump does not lower it. That is the + * responsibility of the statement (or function signature) that + * contains the jump. + */ + bool progress; struct function_record function; @@ -220,18 +305,57 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { virtual void visit(class ir_loop_jump * ir) { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered jump + * instruction can't change any flags. + */ this->block.min_strength = ir->is_break() ? strength_break : strength_continue; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ } virtual void visit(class ir_return * ir) { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered return + * instruction can't change any flags. + */ this->block.min_strength = strength_return; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ } virtual void visit(class ir_discard * ir) { + /* Nothing needs to be done. The ANALYSIS and + * DEAD_CODE_ELIMINATION postconditions are already satisfied, + * because discard statements are ignored by this optimization + * pass. The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because discard statements can't contain other + * statements. + */ } enum jump_strength get_jump_strength(ir_instruction* ir) @@ -304,18 +428,34 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { block_record block_records[2]; ir_jump* jumps[2]; + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * unconditional jumps at the end of ir->then_instructions and + * ir->else_instructions, which are handled below. + */ block_records[0] = visit_block(&ir->then_instructions); block_records[1] = visit_block(&ir->else_instructions); retry: /* we get here if we put code after the if inside a branch */ - for(unsigned i = 0; i < 2; ++i) { - exec_list& list = i ? ir->else_instructions : ir->then_instructions; - jumps[i] = 0; - if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) - jumps[i] = (ir_jump*)list.get_tail(); - } + /* Determine which of ir->then_instructions and + * ir->else_instructions end with an unconditional jump. + */ + for(unsigned i = 0; i < 2; ++i) { + exec_list& list = i ? ir->else_instructions : ir->then_instructions; + jumps[i] = 0; + if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) + jumps[i] = (ir_jump*)list.get_tail(); + } + + /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED + * postcondition by lowering jumps in both then_instructions and + * else_instructions. + */ for(;;) { + /* Determine the types of the jumps that terminate + * ir->then_instructions and ir->else_instructions. + */ jump_strength jump_strengths[2]; for(unsigned i = 0; i < 2; ++i) { @@ -326,7 +466,12 @@ retry: /* we get here if we put code after the if inside a branch */ jump_strengths[i] = strength_none; } - /* move both jumps out if possible */ + /* If both code paths end in a jump, and the jumps are the + * same, and we are pulling out jumps, replace them with a + * single jump that comes after the if instruction. The new + * jump will be visited next, and it will be lowered if + * necessary by the loop or conditional that encloses it. + */ if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) { bool unify = true; if(jump_strengths[0] == strength_continue) @@ -344,10 +489,19 @@ retry: /* we get here if we put code after the if inside a branch */ jumps[1]->remove(); this->progress = true; + /* Update jumps[] to reflect the fact that the jumps + * are gone, and update block_records[] to reflect the + * fact that control can now flow to the next + * instruction. + */ jumps[0] = 0; jumps[1] = 0; block_records[0].min_strength = strength_none; block_records[1].min_strength = strength_none; + + /* The CONTAINED_JUMPS_LOWERED postcondition is now + * satisfied, so we can break out of the loop. + */ break; } } @@ -367,9 +521,18 @@ retry: /* we get here if we put code after the if inside a branch */ else if(should_lower[1]) lower = 1; else + /* Neither code path ends in a jump that needs to be + * lowered, so the CONTAINED_JUMPS_LOWERED postcondition + * is satisfied and we can break out of the loop. + */ break; if(jump_strengths[lower] == strength_return) { + /* To lower a return, we create a return flag (if the + * function doesn't have one already) and add instructions + * that: 1. store the return value (if this function has a + * non-void return) and 2. set the return flag + */ ir_variable* return_flag = this->function.get_return_flag(); if(!this->function.signature->return_type->is_void()) { ir_variable* return_value = this->function.get_return_value(); @@ -378,29 +541,58 @@ retry: /* we get here if we put code after the if inside a branch */ jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL)); this->loop.may_set_return_flag = true; if(this->loop.loop) { + /* If we are in a loop, replace the return instruction + * with a break instruction, and then loop so that the + * break instruction can be lowered if necessary. + */ ir_loop_jump* lowered = 0; lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + /* Note: we must update block_records and jumps to + * reflect the fact that the control path has been + * altered from a return to a break. + */ block_records[lower].min_strength = strength_break; jumps[lower]->replace_with(lowered); jumps[lower] = lowered; - } else + } else { + /* If we are not in a loop, we then proceed as we would + * for a continue statement (set the execute flag to + * false to prevent the rest of the function from + * executing). + */ goto lower_continue; + } this->progress = true; } else if(jump_strengths[lower] == strength_break) { - /* We can't lower to an actual continue because that would execute the increment. + /* To lower a break, we create a break flag (if the loop + * doesn't have one already) and add an instruction that + * sets it. * - * In the lowered code, we instead put the break check between the this->loop body and the increment, - * which is impossible with a real continue as defined by the GLSL IR currently. + * Then we proceed as we would for a continue statement + * (set the execute flag to false to prevent the rest of + * the loop body from executing). * - * Smarter options (such as undoing the increment) are possible but it's not worth implementing them, - * because if break is lowered, continue is almost surely lowered too. + * The visit() function for the loop will ensure that the + * break flag is checked after executing the loop body. */ jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0)); goto lower_continue; } else if(jump_strengths[lower] == strength_continue) { lower_continue: + /* To lower a continue, we create an execute flag (if the + * loop doesn't have one already) and replace the continue + * with an instruction that clears it. + * + * Note that this code path gets exercised when lowering + * return statements that are not inside a loop, so + * this->loop must be initialized even outside of loops. + */ ir_variable* execute_flag = this->loop.get_execute_flag(); jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0)); + /* Note: we must update block_records and jumps to reflect + * the fact that the control path has been altered to an + * instruction that clears the execute flag. + */ jumps[lower] = 0; block_records[lower].min_strength = strength_always_clears_execute_flag; block_records[lower].may_clear_execute_flag = true; @@ -411,6 +603,12 @@ lower_continue: /* move out a jump out if possible */ if(pull_out_jumps) { + /* If one of the branches ends in a jump, and control cannot + * fall out the bottom of the other branch, then we can move + * the jump after the if. + * + * Set move_out to the branch we are moving a jump out of. + */ int move_out = -1; if(jumps[0] && block_records[1].min_strength >= strength_continue) move_out = 0; @@ -421,22 +619,46 @@ lower_continue: { jumps[move_out]->remove(); ir->insert_after(jumps[move_out]); + /* Note: we must update block_records and jumps to reflect + * the fact that the jump has been moved out of the if. + */ jumps[move_out] = 0; block_records[move_out].min_strength = strength_none; this->progress = true; } } + /* Now satisfy the ANALYSIS postcondition by setting + * this->block.min_strength and + * this->block.may_clear_execute_flag based on the + * characteristics of the two branches. + */ if(block_records[0].min_strength < block_records[1].min_strength) this->block.min_strength = block_records[0].min_strength; else this->block.min_strength = block_records[1].min_strength; this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag; + /* Now we need to clean up the instructions that follow the + * if. + * + * If those instructions are unreachable, then satisfy the + * DEAD_CODE_ELIMINATION postcondition by eliminating them. + * Otherwise that postcondition is already satisfied. + */ if(this->block.min_strength) truncate_after_instruction(ir); else if(this->block.may_clear_execute_flag) { + /* If the "if" instruction might clear the execute flag, then + * we need to guard any instructions that follow so that they + * are only executed if the execute flag is set. + * + * If one of the branches of the "if" always clears the + * execute flag, and the other branch never clears it, then + * this is easy: just move all the instructions following the + * "if" into the branch that never clears it. + */ int move_into = -1; if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag) move_into = 1; @@ -451,14 +673,34 @@ lower_continue: if(!next->is_tail_sentinel()) { move_outer_block_inside(ir, list); + /* If any instructions moved, then we need to visit + * them (since they are now inside the "if"). Since + * block_records[move_into] is in its default state + * (see assertion above), we can safely replace + * block_records[move_into] with the result of this + * analysis. + */ exec_list list; list.head = next; block_records[move_into] = visit_block(&list); + /* + * Then we need to re-start our jump lowering, since one + * of the instructions we moved might be a jump that + * needs to be lowered. + */ this->progress = true; goto retry; } } else { + /* If we get here, then the simple case didn't apply; we + * need to actually guard the instructions that follow. + * + * To avoid creating unnecessarily-deep nesting, first + * look through the instructions that follow and unwrap + * any instructions that that are already wrapped in the + * appropriate guard. + */ ir_instruction* ir_after; for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();) { @@ -479,6 +721,9 @@ lower_continue: this->progress = true; } + /* Then, wrap all the instructions that follow in a single + * guard. + */ if(!ir->get_next()->is_tail_sentinel()) { assert(this->loop.execute_flag); ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag)); @@ -493,29 +738,87 @@ lower_continue: virtual void visit(ir_loop *ir) { + /* Visit the body of the loop, with a fresh data structure in + * this->loop so that the analysis we do here won't bleed into + * enclosing loops. + * + * We assume that all code after a loop is reachable from the + * loop (see comments on enum jump_strength), so the + * DEAD_CODE_ELIMINATION postcondition is automatically + * satisfied, as is the block.min_strength portion of the + * ANALYSIS postcondition. + * + * The block.may_clear_execute_flag portion of the ANALYSIS + * postcondition is automatically satisfied because execute + * flags do not propagate outside of loops. + * + * The loop.may_set_return_flag portion of the ANALYSIS + * postcondition is handled below. + */ ++this->function.nesting_depth; loop_record saved_loop = this->loop; this->loop = loop_record(this->function.signature, ir); + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * an unconditional continue or return at the bottom of the + * loop. + */ block_record body = visit_block(&ir->body_instructions); if(body.min_strength >= strength_break) { - /* FINISHME: turn the this->loop into an if, or replace it with its body */ + /* FINISHME: If the min_strength of the loop body is + * strength_break or strength_return, that means that it + * isn't a loop at all, since control flow always leaves the + * body of the loop via break or return. In principle the + * loop could be eliminated in this case. This optimization + * is not implemented yet. + */ } if(this->loop.break_flag) { + /* If a break flag was generated while visiting the body of + * the loop, then at least one break was lowered, so we need + * to generate an if statement at the end of the loop that + * does a "break" if the break flag is set. The break we + * generate won't violate the CONTAINED_JUMPS_LOWERED + * postcondition, because should_lower_jump() always returns + * false for a break that happens at the end of a loop. + */ ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); ir->body_instructions.push_tail(break_if); } + /* If the body of the loop may set the return flag, then at + * least one return was lowered to a break, so we need to ensure + * that the return flag is checked after the body of the loop is + * executed. + */ if(this->loop.may_set_return_flag) { assert(this->function.return_flag); + /* Generate the if statement to check the return flag */ ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag)); + /* Note: we also need to propagate the knowledge that the + * return flag may get set to the outer context. This + * satisfies the loop.may_set_return_flag part of the + * ANALYSIS postcondition. + */ saved_loop.may_set_return_flag = true; if(saved_loop.loop) + /* If this loop is nested inside another one, then the if + * statement that we generated should break out of that + * loop if the return flag is set. Caller will lower that + * break statement if necessary. + */ return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); else + /* Otherwise, all we need to do is ensure that the + * instructions that follow are only executed if the + * return flag is clear. We can do that by moving those + * instructions into the else clause of the generated if + * statement. + */ move_outer_block_inside(ir, &return_if->else_instructions); ir->insert_after(return_if); } @@ -536,6 +839,11 @@ lower_continue: this->loop = loop_record(ir); assert(!this->loop.loop); + + /* Visit the body of the function to lower any jumps that occur + * in it, except possibly an unconditional return statement at + * the end of it. + */ visit_block(&ir->body); if(this->function.return_value) From dbaa2e627effbe1361e1a69c23cf247cf86f2709 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 11:59:32 -0700 Subject: [PATCH 011/110] glsl: Refactor logic for determining whether to lower return statements. Previously, do_lower_jumps.cpp determined whether to lower return statements in ir_lower_jumps_visitor::should_lower_jumps(). Moved this logic to ir_lower_jumps_visitor::visit(ir_function_signature *), so that it can be used in determining whether to lower a return statement at the end of a function. --- src/glsl/lower_jumps.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index da85c6b49c0..fa247c6c9b9 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -210,16 +210,17 @@ struct function_record ir_function_signature* signature; ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */ ir_variable* return_value; - bool is_main; + bool lower_return; unsigned nesting_depth; - function_record(ir_function_signature* p_signature = 0) + function_record(ir_function_signature* p_signature = 0, + bool lower_return = false) { this->signature = p_signature; this->return_flag = 0; this->return_value = 0; this->nesting_depth = 0; - this->is_main = this->signature && (strcmp(this->signature->function_name(), "main") == 0); + this->lower_return = lower_return; } ir_variable* get_return_flag() @@ -398,10 +399,8 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { /* never lower return at the end of a this->function */ if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) lower = false; - else if (this->function.is_main) - lower = lower_main_return; else - lower = lower_sub_return; + lower = this->function.lower_return; break; } return lower; @@ -833,9 +832,15 @@ lower_continue: assert(!this->function.signature); assert(!this->loop.loop); + bool lower_return; + if (strcmp(ir->function_name(), "main") == 0) + lower_return = lower_main_return; + else + lower_return = lower_sub_return; + function_record saved_function = this->function; loop_record saved_loop = this->loop; - this->function = function_record(ir); + this->function = function_record(ir, lower_return); this->loop = loop_record(ir); assert(!this->loop.loop); From afc9a50fba39df520046019c6993d7b7559329ea Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 12:14:07 -0700 Subject: [PATCH 012/110] glsl: Lower unconditional return statements. Previously, lower_jumps.cpp only lowered return statements that appeared inside of an if statement. Without this patch, lower_jumps.cpp might not lower certain return statements, causing some back-ends to fail (as in bug #36669). Fixes unit test test_lower_returns_1. --- src/glsl/lower_jumps.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index fa247c6c9b9..eceba09266e 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -851,6 +851,20 @@ lower_continue: */ visit_block(&ir->body); + /* If the body ended in an unconditional return of non-void, + * then we don't need to lower it because it's the one canonical + * return. + * + * If the body ended in a return of void, eliminate it because + * it is redundant. + */ + if (ir->return_type->is_void() && + get_jump_strength((ir_instruction *) ir->body.get_tail())) { + ir_jump *jump = (ir_jump *) ir->body.get_tail(); + assert (jump->ir_type == ir_type_return); + jump->remove(); + } + if(this->function.return_value) ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value))); From 03145ba655ad9173a74b853843eccaae78ff392f Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 18:26:05 -0700 Subject: [PATCH 013/110] glsl: lower unconditional returns and continues in loops. Previously, lower_jumps.cpp would only lower return and continue statements that appeared inside conditionals. This patch makes it lower unconditional returns and continue statements that occur inside a loop. Such unconditional flow control statements would be unlikely to be explicitly coded by a reasonable user, however they might arise as a result of other optimizations. Without this patch, lower_jumps.cpp might not lower certain return and continue statements, causing some backends to fail. Fixes unit tests test_lower_return_void_at_end_of_loop and test_remove_continue_at_end_of_loop. --- src/glsl/lower_jumps.cpp | 62 ++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index eceba09266e..cbdd8eaa8be 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -304,6 +304,43 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { } } + /** + * Insert the instructions necessary to lower a return statement, + * before the given return instruction. + */ + void insert_lowered_return(ir_return *ir) + { + ir_variable* return_flag = this->function.get_return_flag(); + if(!this->function.signature->return_type->is_void()) { + ir_variable* return_value = this->function.get_return_value(); + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_value), + ir->value)); + } + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_flag), + new (ir) ir_constant(true))); + this->loop.may_set_return_flag = true; + } + + /** + * If the given instruction is a return, lower it to instructions + * that store the return value (if there is one), set the return + * flag, and then break. + * + * It is safe to pass NULL to this function. + */ + void lower_return_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_return) { + return; + } + insert_lowered_return((ir_return*)ir); + ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + } + virtual void visit(class ir_loop_jump * ir) { /* Eliminate all instructions after each one, since they are @@ -532,13 +569,7 @@ retry: /* we get here if we put code after the if inside a branch */ * that: 1. store the return value (if this function has a * non-void return) and 2. set the return flag */ - ir_variable* return_flag = this->function.get_return_flag(); - if(!this->function.signature->return_type->is_void()) { - ir_variable* return_value = this->function.get_return_value(); - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_value), ((ir_return*)jumps[lower])->value, NULL)); - } - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL)); - this->loop.may_set_return_flag = true; + insert_lowered_return((ir_return*)jumps[lower]); if(this->loop.loop) { /* If we are in a loop, replace the return instruction * with a break instruction, and then loop so that the @@ -761,10 +792,25 @@ lower_continue: /* Recursively lower nested jumps. This satisfies the * CONTAINED_JUMPS_LOWERED postcondition, except in the case of * an unconditional continue or return at the bottom of the - * loop. + * loop, which are handled below. */ block_record body = visit_block(&ir->body_instructions); + /* If the loop ends in an unconditional continue, eliminate it + * because it is redundant. + */ + ir_instruction *ir_last + = (ir_instruction *) ir->body_instructions.get_tail(); + if (get_jump_strength(ir_last) == strength_continue) { + ir_last->remove(); + } + + /* If the loop ends in an unconditional return, and we are + * lowering returns, lower it. + */ + if (this->function.lower_return) + lower_return_unconditionally(ir_last); + if(body.min_strength >= strength_break) { /* FINISHME: If the min_strength of the loop body is * strength_break or strength_return, that means that it From 382cee91a4bbbee45897239e6802ccaa5a5ad9c3 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 12:36:23 -0700 Subject: [PATCH 014/110] glsl: Use foreach_list in lower_jumps.cpp The visitor class in lower_jumps.cpp never removes or replaces the instruction being visited, but it frequently alters or removes the instructions that follow it. Therefore, to make sure the altered IR is visited, it needs to iterate through exec_lists using foreach_list rather than visit_exec_list(). Without this patch, lower_jumps.cpp may require multiple passes in order to lower all jumps. This results in sub-optimal output because lower_jumps.cpp produces a brand new set of temporary variables each time it is run, and the redundant temporary variables are not guaranteed to be eliminated by later optimization passes. Also, certain invariants assumed by lower_jumps.cpp may fail to hold, causing assertion failures. Fixes unit tests test_lower_pulled_out_jump, test_lower_unified_returns, test_lower_guarded_conditional_break, test_lower_return_non_void_at_end_of_loop, and test_lower_returns_3. Reviewed-by: Kenneth Graunke --- src/glsl/lower_jumps.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index cbdd8eaa8be..199a0184fee 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -445,9 +445,20 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { block_record visit_block(exec_list* list) { + /* Note: since visiting a node may change that node's next + * pointer, we can't use visit_exec_list(), because + * visit_exec_list() caches the node's next pointer before + * visiting it. So we use foreach_list() instead. + * + * foreach_list() isn't safe if the node being visited gets + * removed, but fortunately this visitor doesn't do that. + */ + block_record saved_block = this->block; this->block = block_record(); - visit_exec_list(list, this); + foreach_list(node, list) { + ((ir_instruction *) node)->accept(this); + } block_record ret = this->block; this->block = saved_block; return ret; From e71b4ab8a64bf978b2036976a41e30996eebb0c8 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 15:33:36 -0700 Subject: [PATCH 015/110] glsl: In lower_jumps.cpp, lower both branches of a conditional. Previously, lower_jumps.cpp would break out of its loop after lowering a jump instruction in just the then- or else-branch of a conditional, and it would fail to lower a jump instruction occurring in the other branch. Without this patch, lower_jumps.cpp may require multiple passes in order to lower all jumps. This results in sub-optimal output because lower_jumps.cpp produces a brand new set of temporary variables each time it is run, and the redundant temporary variables are not guaranteed to be eliminated by later optimization passes. Fixes unit test test_lower_returns_4. --- src/glsl/lower_jumps.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index 199a0184fee..07897825b49 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -638,7 +638,10 @@ lower_continue: block_records[lower].min_strength = strength_always_clears_execute_flag; block_records[lower].may_clear_execute_flag = true; this->progress = true; - break; + + /* Let the loop run again, in case the other branch of the + * if needs to be lowered too. + */ } } From 067c9d7bd776260298ceabda026425ed7e4eb161 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 1 Jul 2011 17:29:35 -0700 Subject: [PATCH 016/110] glsl: Lower break instructions when necessary at the end of a loop. Normally lower_jumps.cpp doesn't need to lower a break instruction that occurs at the end of a loop, because all back-ends can produce proper GPU instructions for a break instruction in this "canonical" location. However, if other break instructions within the loop are already being lowered, then a break instruction at the end of the loop needs to be lowered too, since after the optimization is complete a new conditional break will be inserted at the end of the loop. Without this patch, lower_jumps.cpp may require multiple passes in order to lower all jumps. This results in sub-optimal output because lower_jumps.cpp produces a brand new set of temporary variables each time it is run, and the redundant temporary variables are not guaranteed to be eliminated by later optimization passes. Fixes unit test test_lower_breaks_6. --- src/glsl/lower_jumps.cpp | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index 07897825b49..61874990a94 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -341,6 +341,50 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); } + /** + * Create the necessary instruction to replace a break instruction. + */ + ir_instruction *create_lowered_break() + { + void *ctx = this->function.signature; + return new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(this->loop.get_break_flag()), + new(ctx) ir_constant(true), + 0); + } + + /** + * If the given instruction is a break, lower it to an instruction + * that sets the break flag, without consulting + * should_lower_jump(). + * + * It is safe to pass NULL to this function. + */ + void lower_break_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_break) { + return; + } + ir->replace_with(create_lowered_break()); + } + + /** + * If the block ends in a conditional or unconditional break, lower + * it, even though should_lower_jump() says it needn't be lowered. + */ + void lower_final_breaks(exec_list *block) + { + ir_instruction *ir = (ir_instruction *) block->get_tail(); + lower_break_unconditionally(ir); + ir_if *ir_if = ir->as_if(); + if (ir_if) { + lower_break_unconditionally( + (ir_instruction *) ir_if->then_instructions.get_tail()); + lower_break_unconditionally( + (ir_instruction *) ir_if->else_instructions.get_tail()); + } + } + virtual void visit(class ir_loop_jump * ir) { /* Eliminate all instructions after each one, since they are @@ -616,7 +660,7 @@ retry: /* we get here if we put code after the if inside a branch */ * The visit() function for the loop will ensure that the * break flag is checked after executing the loop body. */ - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0)); + jumps[lower]->insert_before(create_lowered_break()); goto lower_continue; } else if(jump_strengths[lower] == strength_continue) { lower_continue: @@ -836,6 +880,9 @@ lower_continue: } if(this->loop.break_flag) { + /* We only get here if we are lowering breaks */ + assert (lower_break); + /* If a break flag was generated while visiting the body of * the loop, then at least one break was lowered, so we need * to generate an if statement at the end of the loop that @@ -843,7 +890,13 @@ lower_continue: * generate won't violate the CONTAINED_JUMPS_LOWERED * postcondition, because should_lower_jump() always returns * false for a break that happens at the end of a loop. + * + * However, if the loop already ends in a conditional or + * unconditional break, then we need to lower that break, + * because it won't be at the end of the loop anymore. */ + lower_final_breaks(&ir->body_instructions); + ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); ir->body_instructions.push_tail(break_if); From 1e39fc784bc3d0d5ad01d9c147529ac0e10f1262 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 5 May 2011 13:09:16 -0700 Subject: [PATCH 017/110] DRI2/GLX: use new swap event types Use the new swap event type so we get valid SBC values. Reviewed-by: Ian Romanick Reviewed-by: Jeremy Huddleston Signed-off-by: Jesse Barnes --- configure.ac | 4 ++-- src/glx/dri2.c | 4 ++-- src/glx/glxext.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index f19f6478b6a..dde13c9b57d 100644 --- a/configure.ac +++ b/configure.ac @@ -22,8 +22,8 @@ LIBDRM_REQUIRED=2.4.24 LIBDRM_RADEON_REQUIRED=2.4.24 LIBDRM_INTEL_REQUIRED=2.4.24 LIBDRM_NOUVEAU_REQUIRED=0.6 -DRI2PROTO_REQUIRED=2.1 -GLPROTO_REQUIRED=1.4.11 +DRI2PROTO_REQUIRED=2.6 +GLPROTO_REQUIRED=1.4.14 LIBDRM_XORG_REQUIRED=2.4.24 LIBKMS_XORG_REQUIRED=1.0.0 diff --git a/src/glx/dri2.c b/src/glx/dri2.c index adfd3d1f7c8..8654a37688f 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -97,7 +97,7 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) case DRI2_BufferSwapComplete: { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; - xDRI2BufferSwapComplete *awire = (xDRI2BufferSwapComplete *)wire; + xDRI2BufferSwapComplete2 *awire = (xDRI2BufferSwapComplete2 *)wire; /* Ignore swap events if we're not looking for them */ aevent->type = dri2GetSwapEventType(dpy, awire->drawable); @@ -124,7 +124,7 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) } aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo; + aevent->sbc = awire->sbc; return True; } #endif diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 73c332793a0..40a06a8612b 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -133,12 +133,12 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire) case GLX_BufferSwapComplete: { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; - xGLXBufferSwapComplete *awire = (xGLXBufferSwapComplete *)wire; + xGLXBufferSwapComplete2 *awire = (xGLXBufferSwapComplete2 *)wire; aevent->event_type = awire->event_type; aevent->drawable = awire->drawable; aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo; + aevent->sbc = awire->sbc; return True; } default: From 4df137691ee29bb812347fa2c5f19095243ede22 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 6 May 2011 10:31:24 -0700 Subject: [PATCH 018/110] GLX/DRI2: handle swap event swap count wrapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new GLX drawable struct to track client related info, and add a wrap counter to it drawable and track it as we receive events. This allows us to support the full 64 bits of the event structure we pass to the client even though the server only gives us a 32 bit count. Reviewed-by: Michel Dänzer Reviewed-by: Jeremy Huddleston Signed-off-by: Jesse Barnes --- src/glx/dri2.c | 12 ++++++++- src/glx/glx_pbuffer.c | 11 +++++++++ src/glx/glxclient.h | 16 ++++++++++++ src/glx/glxcmds.c | 57 +++++++++++++++++++++++++++++++++++++++++++ src/glx/glxext.c | 14 ++++++++++- 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/src/glx/dri2.c b/src/glx/dri2.c index 8654a37688f..229840d6919 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -88,6 +88,7 @@ static Bool DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); + struct glx_drawable *glxDraw; XextCheckExtension(dpy, info, dri2ExtensionName, False); @@ -98,6 +99,9 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; xDRI2BufferSwapComplete2 *awire = (xDRI2BufferSwapComplete2 *)wire; + __GLXDRIdrawable *pdraw; + + pdraw = dri2GetGlxDrawableFromXDrawableId(dpy, awire->drawable); /* Ignore swap events if we're not looking for them */ aevent->type = dri2GetSwapEventType(dpy, awire->drawable); @@ -124,7 +128,13 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) } aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = awire->sbc; + + glxDraw = GetGLXDrawable(dpy, pdraw->drawable); + if (awire->sbc < glxDraw->lastEventSbc) + glxDraw->eventSbcWrap += 0x100000000; + glxDraw->lastEventSbc = awire->sbc; + aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; + return True; } #endif diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c index 0e74e7ccd0e..6738252a31d 100644 --- a/src/glx/glx_pbuffer.c +++ b/src/glx/glx_pbuffer.c @@ -396,6 +396,7 @@ CreateDrawable(Display *dpy, struct glx_config *config, Drawable drawable, const int *attrib_list, CARD8 glxCode) { xGLXCreateWindowReq *req; + struct glx_drawable *glxDraw; CARD32 *data; unsigned int i; CARD8 opcode; @@ -411,6 +412,10 @@ CreateDrawable(Display *dpy, struct glx_config *config, if (!opcode) return None; + glxDraw = Xmalloc(sizeof(*glxDraw)); + if (!glxDraw) + return None; + LockDisplay(dpy); GetReqExtra(GLXCreateWindow, 8 * i, req); data = (CARD32 *) (req + 1); @@ -429,6 +434,11 @@ CreateDrawable(Display *dpy, struct glx_config *config, UnlockDisplay(dpy); SyncHandle(); + if (InitGLXDrawable(dpy, glxDraw, drawable, xid)) { + free(glxDraw); + return None; + } + if (!CreateDRIDrawable(dpy, config, drawable, xid, attrib_list, i)) { if (glxCode == X_GLXCreatePixmap) glxCode = X_GLXDestroyPixmap; @@ -454,6 +464,7 @@ DestroyDrawable(Display * dpy, GLXDrawable drawable, CARD32 glxCode) protocolDestroyDrawable(dpy, drawable, glxCode); + DestroyGLXDrawable(dpy, drawable); DestroyDRIDrawable(dpy, drawable, GL_FALSE); return; diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h index 06415288165..f9154266101 100644 --- a/src/glx/glxclient.h +++ b/src/glx/glxclient.h @@ -567,6 +567,8 @@ struct glx_display */ struct glx_screen **screens; + __glxHashTable *glXDrawHash; + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __glxHashTable *drawHash; @@ -579,6 +581,14 @@ struct glx_display #endif }; +struct glx_drawable { + XID xDrawable; + XID drawable; + + uint32_t lastEventSbc; + int64_t eventSbcWrap; +}; + extern int glx_screen_init(struct glx_screen *psc, int screen, struct glx_display * priv); @@ -784,6 +794,12 @@ extern int applegl_create_display(struct glx_display *display); #endif + +extern struct glx_drawable *GetGLXDrawable(Display *dpy, GLXDrawable drawable); +extern int InitGLXDrawable(Display *dpy, struct glx_drawable *glxDraw, + XID xDrawable, GLXDrawable drawable); +extern void DestroyGLXDrawable(Display *dpy, GLXDrawable drawable); + extern struct glx_context dummyContext; extern struct glx_screen * diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 191b321ce32..fc0a07901a7 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -90,6 +90,51 @@ GetGLXDRIDrawable(Display * dpy, GLXDrawable drawable) #endif +_X_HIDDEN struct glx_drawable * +GetGLXDrawable(Display *dpy, GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + struct glx_drawable *glxDraw; + + if (priv == NULL) + return NULL; + + if (__glxHashLookup(priv->glXDrawHash, drawable, (void *) &glxDraw) == 0) + return glxDraw; + + return NULL; +} + +_X_HIDDEN int +InitGLXDrawable(Display *dpy, struct glx_drawable *glxDraw, XID xDrawable, + GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + + if (!priv) + return -1; + + glxDraw->xDrawable = xDrawable; + glxDraw->drawable = drawable; + glxDraw->lastEventSbc = 0; + glxDraw->eventSbcWrap = 0; + + return __glxHashInsert(priv->glXDrawHash, drawable, glxDraw); +} + +_X_HIDDEN void +DestroyGLXDrawable(Display *dpy, GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + struct glx_drawable *glxDraw; + + if (!priv) + return; + + glxDraw = GetGLXDrawable(dpy, drawable); + __glxHashDelete(priv->glXDrawHash, drawable); + free(glxDraw); +} /** * Get the GLX per-screen data structure associated with a GLX context. @@ -608,6 +653,7 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) return pixmap; #else xGLXCreateGLXPixmapReq *req; + struct glx_drawable *glxDraw; GLXPixmap xid; CARD8 opcode; @@ -616,6 +662,10 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) return None; } + glxDraw = Xmalloc(sizeof(*glxDraw)); + if (!glxDraw) + return None; + /* Send the glXCreateGLXPixmap request */ LockDisplay(dpy); GetReq(GLXCreateGLXPixmap, req); @@ -628,6 +678,11 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) UnlockDisplay(dpy); SyncHandle(); + if (InitGLXDrawable(dpy, glxDraw, pixmap, req->glxpixmap)) { + free(glxDraw); + return None; + } + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) do { /* FIXME: Maybe delay __DRIdrawable creation until the drawable @@ -700,6 +755,8 @@ glXDestroyGLXPixmap(Display * dpy, GLXPixmap glxpixmap) UnlockDisplay(dpy); SyncHandle(); + DestroyGLXDrawable(dpy, glxpixmap); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) { struct glx_display *const priv = __glXInitialize(dpy); diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 40a06a8612b..8704c484f96 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -134,11 +134,19 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire) { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; xGLXBufferSwapComplete2 *awire = (xGLXBufferSwapComplete2 *)wire; + struct glx_drawable *glxDraw = GetGLXDrawable(dpy, awire->drawable); aevent->event_type = awire->event_type; aevent->drawable = awire->drawable; aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = awire->sbc; + + if (!glxDraw) + return False; + + if (awire->sbc < glxDraw->lastEventSbc) + glxDraw->eventSbcWrap += 0x100000000; + glxDraw->lastEventSbc = awire->sbc; + aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; return True; } default: @@ -227,6 +235,8 @@ glx_display_free(struct glx_display *priv) if (priv->serverGLXversion) Xfree((char *) priv->serverGLXversion); + __glxHashDestroy(priv->glXDrawHash); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __glxHashDestroy(priv->drawHash); @@ -847,6 +857,8 @@ __glXInitialize(Display * dpy) XESetCloseDisplay(dpy, dpyPriv->codes->extension, __glXCloseDisplay); XESetErrorString (dpy, dpyPriv->codes->extension,__glXErrorString); + dpyPriv->glXDrawHash = __glxHashCreate(); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) glx_direct = (getenv("LIBGL_ALWAYS_INDIRECT") == NULL); glx_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL); From 86f8b4117f35c788c8a043c2e241eb19eaacae8c Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 6 Jul 2011 05:29:08 +0400 Subject: [PATCH 019/110] r600g: LIT: swap MUL_LIT operands to fix 0^0 For 0^0 case result of "LOG_CLAMPED ...,0" is -MAX_FLOAT, and then result of "MUL_LIT ...,0,-MAX_FLOAT,..." is -MAX_FLOAT instead of 0 because of special src1 checks for -MAX_FLOAT. So swap src0/1: "MUL_LIT ...,-MAX_FLOAT,0,..." to get expected 0, then result of "EXP_IEEE ...,0" is 1 as expected for LIT. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f83d7079b29..f3cbf9807fd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1388,7 +1388,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } else { - /* dst.z = log(src.y) */ + /* tmp.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); r600_bc_src(&alu.src[0], &ctx->src[0], 1); @@ -1404,13 +1404,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) chan = alu.dst.chan; sel = alu.dst.sel; - /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[1].sel = sel; - alu.src[1].chan = chan; - + alu.src[0].sel = sel; + alu.src[0].chan = chan; + r600_bc_src(&alu.src[1], &ctx->src[0], 3); r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; From b693787fdf82d065c548e80944aad14e9ba64def Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 6 Jul 2011 05:29:09 +0400 Subject: [PATCH 020/110] r600g: RSQ: clear NEG for operand Need to clear NEG bit because it applies after ABS, e.g. "RSQ ..., -1" uses -|1| as operand. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f3cbf9807fd..5aad3e359bc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1506,6 +1506,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; + alu.src[i].neg = 0; } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; From f87d566f4b0e4df18ab60d64951013629bdd624c Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 8 Jul 2011 06:19:36 +0400 Subject: [PATCH 021/110] r600g: introduce r600_bc_src_set_abs helper and fix LOG LOG instruction should use absolute values of source operand. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5aad3e359bc..6dae6926360 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -939,6 +939,12 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } +static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +{ + bc_src->abs = 1; + bc_src->neg = 0; +} + static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -998,9 +1004,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) alu.src[1].neg = 1; break; case TGSI_OPCODE_ABS: - alu.src[0].abs = 1; - if (alu.src[0].neg) - alu.src[0].neg = 0; + r600_bc_src_set_abs(&alu.src[0]); break; default: break; @@ -1505,8 +1509,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bc_src(&alu.src[i], &ctx->src[i], 0); - alu.src[i].abs = 1; - alu.src[i].neg = 0; + r600_bc_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2489,7 +2492,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) int r; int i; - /* result.x = floor(log2(src)); */ + /* result.x = floor(log2(|src|)); */ if (inst->Dst[0].Register.WriteMask & 1) { if (ctx->bc->chiprev == CHIPREV_CAYMAN) { for (i = 0; i < 3; i++) { @@ -2497,6 +2500,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2514,6 +2518,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2538,7 +2543,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.y = src.x / (2 ^ floor(log2(src.x))); */ + /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { if (ctx->bc->chiprev == CHIPREV_CAYMAN) { @@ -2547,6 +2552,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2564,6 +2570,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2663,6 +2670,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2677,7 +2685,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.z = log2(src);*/ + /* result.z = log2(|src|);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { if (ctx->bc->chiprev == CHIPREV_CAYMAN) { for (i = 0; i < 3; i++) { @@ -2685,6 +2693,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2702,6 +2711,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; From 3efb47f0b0557a6b96a7e41b27725cea4736a061 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 8 Jul 2011 06:19:37 +0400 Subject: [PATCH 022/110] r600g: introduce r600_bc_src_toggle_neg helper and fix SUB & LRP SUB & LRP instructions should toggle NEG bit instead of setting it, otherwise e.g. "SUB a,b,-1" is translated as "ADD a,b,-1" Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_shader.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6dae6926360..6bb5ceb5450 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -945,6 +945,11 @@ static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) bc_src->neg = 0; } +static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +{ + bc_src->neg = !bc_src->neg; +} + static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -1001,7 +1006,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: r600_bc_src_set_abs(&alu.src[0]); @@ -2195,7 +2200,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; r600_bc_src(&alu.src[1], &ctx->src[0], i); - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { From 57b57f6d1c3689a3a44222cb169bfd3e3142a68d Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 8 Jul 2011 20:17:50 -0700 Subject: [PATCH 023/110] i965/gen7: Remove gratuitous dirty flags from WM and PS state. Commit b46dc45ceef3deb17ba2b0b4300eeb93e9cf7833 claimed that NEW_POLYGONSTIPPLE is gratuitous, but somehow just changed comments and whitespace instead of actually removing the flag. While we're at it, 3DSTATE_PS doesn't appear to need NEW_LINE or NEW_POLYGON either (those are in 3DSTATE_WM). Also, 3DSTATE_WM doesn't appear to need BRW_NEW_NR_WM_SURFACES or BRW_NEW_CURBE_OFFSETS either (those are in 3DSTATE_PS). NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen7_wm_state.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 17f75354f1d..0f5b06cbf48 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -138,11 +138,9 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen7_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | + .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), .cache = 0, @@ -240,10 +238,7 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_LINE | - _NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_PROGRAM_CONSTANTS), + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | From d03fdc4cdefdfdc5b59547945704c6037a5061c7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Jul 2011 15:30:48 -0700 Subject: [PATCH 024/110] i965/gen4: Fix GPU hangs since the program streaming change. This was tricky. We were doing a use-before-initialize of grf_reg_count, but the value usually got overwritten anyway -- when we didn't have to do a relocation (typical), or on gen5 when we didn't have relocations at all. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38771 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vs_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index d5010a21e80..179ca199b45 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -47,6 +47,7 @@ brw_prepare_vs_unit(struct brw_context *brw) memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread0.kernel_start_pointer = brw_program_reloc(brw, brw->vs.state_offset + @@ -54,7 +55,6 @@ brw_prepare_vs_unit(struct brw_context *brw) brw->vs.prog_offset + (vs->thread0.grf_reg_count << 1)) >> 6; - vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, From 949896b82f19f72333e7f6c132bd55e023f0170f Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 025/110] r600g: Fix the type of the family field in r600_pipe_context. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_pipe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index c58c2f77743..b51fa24dfb2 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -175,7 +175,7 @@ struct r600_pipe_fences { struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - unsigned family; + enum radeon_family family; void *custom_dsa_flush; struct r600_screen *screen; struct radeon *radeon; From b3b946b0ab88c1d7edeab183d8ad5125ba223392 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 026/110] r600g: Store the chip class in r600_pipe_context. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/evergreen_state.c | 20 +++------- src/gallium/drivers/r600/r600_asm.c | 4 +- src/gallium/drivers/r600/r600_pipe.c | 42 +++++--------------- src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_state.c | 13 +++--- src/gallium/drivers/r600/r600_state_common.c | 26 ++++++------ 6 files changed, 36 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 887f52e67db..97f10ce77b9 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -657,13 +657,11 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, u32 color_control, target_mask; /* FIXME there is more then 8 framebuffer */ unsigned blend_cntl[8]; - enum radeon_family family; if (blend == NULL) { return NULL; } - family = r600_get_family(rctx->radeon); rstate = &blend->rstate; rstate->id = R600_PIPE_STATE_BLEND; @@ -690,7 +688,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFD, NULL); - if (family != CHIP_CAYMAN) + if (rctx->chip_class != CAYMAN) r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); else { r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); @@ -827,9 +825,6 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; unsigned clip_rule; - enum radeon_family family; - - family = r600_get_family(rctx->radeon); if (rs == NULL) { return NULL; @@ -888,7 +883,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), @@ -1447,14 +1442,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); u32 shader_mask, tl, br, target_mask; - enum radeon_family family; int tl_x, tl_y, br_x, br_y; if (rstate == NULL) return; - family = r600_get_family(rctx->radeon); - evergreen_context_flush_dest_caches(&rctx->ctx); rctx->ctx.num_dest_buffers = state->nr_cbufs; @@ -1491,7 +1483,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (br_y == 0) tl_y = 1; /* cayman hw workaround */ - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { if (br_x == 1 && br_y == 1) br_x = 2; } @@ -1535,7 +1527,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, shader_mask, 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, 0x00000000, 0xFFFFFFFF, NULL); } else { @@ -1722,9 +1714,9 @@ void evergreen_init_config(struct r600_pipe_context *rctx) enum radeon_family family; unsigned tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { cayman_init_config(rctx); return; } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 065f955ebcb..cd4984e389a 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2162,7 +2162,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; unsigned format, num_format, format_comp, endian; u32 *bytecode; int i, r; @@ -2287,7 +2287,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bo_unmap(rctx->radeon, ve->fetch_shader); r600_bc_clear(&bc); - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); else r600_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d512268f63f..a3df4f571a0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -194,7 +194,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void { struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); struct r600_screen* rscreen = (struct r600_screen *)screen; - enum chip_class class; if (rctx == NULL) return NULL; @@ -211,6 +210,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->screen = rscreen; rctx->radeon = rscreen->radeon; rctx->family = r600_get_family(rctx->radeon); + rctx->chip_class = r600_get_family_class(rctx->radeon); rctx->fences.bo = NULL; rctx->fences.data = NULL; @@ -224,47 +224,29 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_surface_functions(rctx); rctx->context.draw_vbo = r600_draw_vbo; - switch (r600_get_family(rctx->radeon)) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: + switch (rctx->chip_class) { + case R600: + case R700: r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } r600_init_config(rctx); + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - case CHIP_CAYMAN: + case EVERGREEN: + case CAYMAN: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } evergreen_init_config(rctx); + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); break; default: - R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + R600_ERR("Unsupported chip class %d.\n", rctx->chip_class); r600_destroy_context(&rctx->context); return NULL; } @@ -289,12 +271,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - class = r600_get_family_class(rctx->radeon); - if (class == R600 || class == R700) - rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); - else - rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index b51fa24dfb2..6f399ed43b0 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -176,6 +176,7 @@ struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; enum radeon_family family; + enum chip_class chip_class; void *custom_dsa_flush; struct r600_screen *screen; struct radeon *radeon; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3085cd9a87a..8a684e63c01 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1399,7 +1399,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* EXPORT_NORM is an optimzation that can be enabled for better * performance in certain cases */ - if (rctx->family < CHIP_RV770) { + if (rctx->chip_class == R600) { /* EXPORT_NORM can be enabled if: * - 11-bit or smaller UNORM/SNORM/SRGB * - BLEND_CLAMP is enabled @@ -1559,7 +1559,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, 0xFFFFFFFF, NULL); @@ -1653,16 +1653,13 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) void r600_adjust_gprs(struct r600_pipe_context *rctx) { - enum radeon_family family; struct r600_pipe_state rstate; unsigned num_ps_gprs = rctx->default_ps_gprs; unsigned num_vs_gprs = rctx->default_vs_gprs; unsigned tmp; int diff; - family = r600_get_family(rctx->radeon); - - if (family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) return; if (!rctx->ps_shader && !rctx->vs_shader) @@ -1714,7 +1711,7 @@ void r600_init_config(struct r600_pipe_context *rctx) struct r600_pipe_state *rstate = &rctx->config; u32 tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; ps_prio = 0; vs_prio = 1; gs_prio = 2; @@ -1895,7 +1892,7 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - if (family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d9140403e5a..408eaed491b 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -109,7 +109,7 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->states[rs->rstate.id] = &rs->rstate; r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_polygon_offset_update(rctx); } else { r600_polygon_offset_update(rctx); @@ -212,7 +212,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, /* Zero states. */ for (i = 0; i < count; i++) { if (!buffers[i].buffer) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -220,7 +220,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } } for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -367,7 +367,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) for (i = 0; i < rshader->ninput; i++) { if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || rshader->input[i].name == TGSI_SEMANTIC_FACE) - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) continue; else sid=0; @@ -387,7 +387,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { + if (rctx->chip_class < EVERGREEN) { if (rshader->input[i].centroid) tmp |= S_028644_SEL_CENTROID(1); @@ -434,14 +434,14 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { @@ -462,13 +462,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { @@ -521,14 +521,14 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { @@ -600,7 +600,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_shader_rebuild(ctx, rctx->vs_shader); if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) || - ((rctx->family >= CHIP_CEDAR) && rctx->ps_shader->shader.fs_write_all && + ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all && (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) r600_shader_rebuild(ctx, rctx->ps_shader); @@ -655,7 +655,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.indices_bo_offset = draw.index_buffer_offset; } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_draw(&rctx->ctx, &rdraw); } else { r600_context_draw(&rctx->ctx, &rdraw); From 4f7dfd8ad3185f006e7ae8ed86bafd4d66ebc903 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 027/110] r600g: Get rid of the superfluous family field from r600_shader. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_shader.c | 9 ++++----- src/gallium/drivers/r600/r600_shader.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6bb5ceb5450..91649e0c058 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -99,14 +99,14 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s /* build state */ switch (rshader->processor_type) { case TGSI_PROCESSOR_VERTEX: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_vs(ctx, shader); } else { r600_pipe_shader_vs(ctx, shader); } break; case TGSI_PROCESSOR_FRAGMENT: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_ps(ctx, shader); } else { r600_pipe_shader_ps(ctx, shader); @@ -135,7 +135,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s fprintf(stderr, "--------------------------------------------------------------\n"); tgsi_dump(shader->tokens, 0); } - shader->shader.family = r600_get_family(rctx->radeon); r = r600_shader_from_tgsi(rctx, shader); if (r) { R600_ERR("translation from TGSI failed !\n"); @@ -610,7 +609,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.bc = &shader->bc; ctx.shader = shader; - r = r600_bc_init(ctx.bc, shader->family); + r = r600_bc_init(ctx.bc, rctx->family); if (r) return r; ctx.tokens = tokens; @@ -802,7 +801,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { output[i + j].array_base = shader->output[i].sid; output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { + if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { for (j = 1; j < shader->nr_cbufs; j++) { memset(&output[i + j], 0, sizeof(struct r600_bc_output)); output[i + j].gpr = shader->output[i].gpr; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 76aebf2b1ea..3ba84bd8907 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -43,7 +43,6 @@ struct r600_shader { unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; - enum radeon_family family; boolean uses_kill; boolean fs_write_all; boolean clamp_color; From 89dc31a28d8e5607989ec11cfd29310c1c97f6ac Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 028/110] r600g: Replace the CHIPREV_* defines with the chip_class enum. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/eg_asm.c | 2 +- src/gallium/drivers/r600/r600_asm.c | 136 ++++++++++++------------ src/gallium/drivers/r600/r600_asm.h | 2 +- src/gallium/drivers/r600/r600_opcodes.h | 10 +- src/gallium/drivers/r600/r600_shader.c | 42 ++++---- 5 files changed, 93 insertions(+), 99 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fb0b0f104bf..c95872b0809 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -69,7 +69,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst); - if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */ + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index cd4984e389a..471fc65e7a5 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -41,9 +41,9 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r if(alu->is_op3) return 3; - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: switch (alu->inst) { case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -93,8 +93,8 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r "Need instruction operand number for 0x%x.\n", alu->inst); } break; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: switch (alu->inst) { case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -208,13 +208,13 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_RV635: case CHIP_RS780: case CHIP_RS880: - bc->chiprev = CHIPREV_R600; + bc->chip_class = R600; break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - bc->chiprev = CHIPREV_R700; + bc->chip_class = R700; break; case CHIP_CEDAR: case CHIP_REDWOOD: @@ -227,10 +227,10 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: - bc->chiprev = CHIPREV_EVERGREEN; + bc->chip_class = EVERGREEN; break; case CHIP_CAYMAN: - bc->chiprev = CHIPREV_CAYMAN; + bc->chip_class = CAYMAN; break; default: R600_ERR("unknown family %d\n", bc->family); @@ -301,9 +301,9 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) /* alu instructions that can ony exits once per group */ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || @@ -339,8 +339,8 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || @@ -382,16 +382,16 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || @@ -403,13 +403,13 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; @@ -418,15 +418,15 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); @@ -438,16 +438,16 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || - (bc->chiprev == CHIPREV_EVERGREEN && + (bc->chip_class == EVERGREEN && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); } /* alu instructions that can only execute on the trans unit */ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: if (!alu->is_op3) return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || @@ -478,8 +478,8 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: if (!alu->is_op3) /* Note that FLT_TO_INT_* instructions are vector-only instructions @@ -525,7 +525,7 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, { struct r600_bc_alu *alu; unsigned i, chan, trans; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; @@ -612,7 +612,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; - if (bc->chiprev >= CHIPREV_R700) { + if (bc->chip_class >= R700) { num_res = 2; chan /= 2; } @@ -733,8 +733,8 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + boolean scalar_only = bc->chip_class == CAYMAN ? false : true; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { @@ -806,7 +806,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, struct r600_bc_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -834,7 +834,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; - if (bc->chiprev < CHIPREV_CAYMAN) { + if (bc->chip_class < CAYMAN) { if (alu->src[src].sel == gpr[4] && alu->src[src].chan == chan[4]) { alu->src[src].sel = V_SQ_ALU_SRC_PS; @@ -948,7 +948,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], int i, j, r, src, num_src; int num_once_inst = 0; int have_mova = 0, have_rel = 0; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -1252,7 +1252,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int uint32_t literal[4]; unsigned nliteral; struct r600_bc_alu *slots[5]; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) return r; @@ -1302,26 +1302,26 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) { - switch (bc->chiprev) { - case CHIPREV_R600: + switch (bc->chip_class) { + case R600: return 8; - case CHIPREV_R700: + case R700: return 16; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: return 64; default: - R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + R600_ERR("Unknown chip class %d.\n", bc->chip_class); return 8; } } static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) { - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) return TRUE; } else { @@ -1350,7 +1350,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) free(nvtx); return r; } - if (bc->chiprev == CHIPREV_CAYMAN) + if (bc->chip_class == CAYMAN) bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; else bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; @@ -1438,7 +1438,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); id++; bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | @@ -1453,7 +1453,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); id++; bc->bytecode[id++] = 0; @@ -1560,13 +1560,13 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - if (bc->chiprev == CHIPREV_R700) + if (bc->chip_class == R700) r700_bc_cf_vtx_build(&bc->bytecode[id], cf); else r600_bc_cf_vtx_build(&bc->bytecode[id], cf); @@ -1673,7 +1673,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev >= CHIPREV_EVERGREEN) + if (bc->chip_class >= EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -1691,13 +1691,13 @@ int r600_bc_build(struct r600_bc *bc) if (r) return r; r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); - switch(bc->chiprev) { - case CHIPREV_R600: + switch(bc->chip_class) { + case R600: r = r600_bc_alu_build(bc, alu, addr); break; - case CHIPREV_R700: - case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ - case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */ + case R700: + case EVERGREEN: /* eg alu is same encoding as r700 */ + case CAYMAN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -1726,7 +1726,7 @@ int r600_bc_build(struct r600_bc *bc) } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { r = r600_bc_vtx_build(bc, vtx, addr); if (r) @@ -1812,17 +1812,17 @@ void r600_bc_dump(struct r600_bc *bc) unsigned nliteral; char chip = '6'; - switch (bc->chiprev) { - case 1: + switch (bc->chip_class) { + case R700: chip = '7'; break; - case 2: + case EVERGREEN: chip = 'E'; break; - case 3: + case CAYMAN: chip = 'C'; break; - case 0: + case R600: default: chip = '6'; break; @@ -1993,7 +1993,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); else fprintf(stderr, "SEL_Y:%d) ", 0); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 540f45bbd06..423e94b8a1d 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -172,7 +172,7 @@ struct r600_cf_callstack { struct r600_bc { enum radeon_family family; - int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ + enum chip_class chip_class; int type; struct list_head cf; struct r600_bc_cf *cf_last; diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 184f32c9960..7ae091ea5cd 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -409,14 +409,8 @@ #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS 0x0000005C +#define BC_INST(bc, x) ((bc)->chip_class >= EVERGREEN ? EG_##x : x) -#define CHIPREV_R600 0 -#define CHIPREV_R700 1 -#define CHIPREV_EVERGREEN 2 -#define CHIPREV_CAYMAN 3 - -#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) - -#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) +#define CTX_INST(x) (ctx->bc->chip_class >= EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 91649e0c058..6f84d8740ee 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -316,7 +316,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -650,13 +650,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.bc->chip_class >= EVERGREEN) { r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -710,9 +710,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) + else if (ctx.bc->chip_class >= EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -885,7 +885,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (ctx.bc->chiprev < CHIPREV_CAYMAN) { + if (ctx.bc->chip_class < CAYMAN) { if (i == (noutput - 1)) { output[i].end_of_program = 1; } @@ -902,7 +902,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi goto out_err; } /* add program end */ - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) cm_bc_add_cf_end(ctx.bc); free(ctx.literals); @@ -1122,7 +1122,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - if (ctx->bc->chiprev == CHIPREV_R600) { + if (ctx->bc->chip_class == R600) { alu.src[1].value = *(uint32_t *)&double_pi; alu.src[2].value = *(uint32_t *)&neg_pi; } else { @@ -1229,7 +1229,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.x = COS */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); @@ -1263,7 +1263,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.y = SIN */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); @@ -1378,7 +1378,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) int sel; int i; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1428,7 +1428,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1905,7 +1905,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { int out_chan; /* Add perspective divide */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1987,7 +1987,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } /* tmp1.z = RCP_e(|tmp1.z|) */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2380,7 +2380,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -2436,7 +2436,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2498,7 +2498,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.x = floor(log2(|src|)); */ if (inst->Dst[0].Register.WriteMask & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2550,7 +2550,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2601,7 +2601,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2635,7 +2635,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2691,7 +2691,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.z = log2(|src|);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); From 2b5b289a570c699403f115cf3ad094ce92eba2fb Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:18:59 +0200 Subject: [PATCH 029/110] r600g: Store the chip class directly in r600_bc. Instead of deriving it from the family again. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/r600_asm.c | 48 +++----------------------- src/gallium/drivers/r600/r600_asm.h | 3 +- src/gallium/drivers/r600/r600_shader.c | 4 +-- 3 files changed, 6 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 471fc65e7a5..5fae2b00c8b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -195,48 +195,10 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); - bc->family = family; - switch (bc->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - bc->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - bc->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - bc->chip_class = EVERGREEN; - break; - case CHIP_CAYMAN: - bc->chip_class = CAYMAN; - break; - default: - R600_ERR("unknown family %d\n", bc->family); - return -EINVAL; - } - return 0; + bc->chip_class = chip_class; } static int r600_bc_add_cf(struct r600_bc *bc) @@ -1701,7 +1663,7 @@ int r600_bc_build(struct r600_bc *bc) r = r700_bc_alu_build(bc, alu, addr); break; default: - R600_ERR("unknown family %d\n", bc->family); + R600_ERR("unknown chip class %d.\n", bc->chip_class); return -EINVAL; } if (r) @@ -2180,9 +2142,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); - if (r) - return r; + r600_bc_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 423e94b8a1d..cbdaacf7178 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -171,7 +171,6 @@ struct r600_cf_callstack { }; struct r600_bc { - enum radeon_family family; enum chip_class chip_class; int type; struct list_head cf; @@ -193,7 +192,7 @@ struct r600_bc { int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); /* r600_asm.c */ -int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6f84d8740ee..de49d212a58 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -609,9 +609,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.bc = &shader->bc; ctx.shader = shader; - r = r600_bc_init(ctx.bc, rctx->family); - if (r) - return r; + r600_bc_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); From 2e53725bbc6385c76b26f069df0865c22fd365c0 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:19:00 +0200 Subject: [PATCH 030/110] r600g: Check for Evergreen chip class instead of Cedar family in r600_context_flush(). Signed-off-by: Henri Verbeet --- src/gallium/winsys/r600/drm/r600_hw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 633cd35f7a7..a2f13ff0863 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1504,7 +1504,7 @@ void r600_context_flush(struct r600_context *ctx) /* suspend queries */ r600_context_queries_suspend(ctx); - if (ctx->radeon->family >= CHIP_CEDAR) + if (ctx->radeon->chip_class >= EVERGREEN) evergreen_context_flush_dest_caches(ctx); else r600_context_flush_dest_caches(ctx); From 7e591111bf783d94ee6034287cde2f4c9214e810 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sat, 9 Jul 2011 17:19:00 +0200 Subject: [PATCH 031/110] r600g: Get rid of some superfluous braces. Signed-off-by: Henri Verbeet --- src/gallium/drivers/r600/evergreen_state.c | 14 +++++++------- src/gallium/drivers/r600/r600_state.c | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 97f10ce77b9..fbf25feaf20 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -510,11 +510,11 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) if (R600_BIG_ENDIAN) { switch(colorformat) { case V_028C70_COLOR_4_4: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 8-bit buffers. */ case V_028C70_COLOR_8: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 16-bit buffers. */ case V_028C70_COLOR_5_6_5: @@ -522,7 +522,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_4_4_4_4: case V_028C70_COLOR_16: case V_028C70_COLOR_8_8: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; /* 32-bit buffers. */ case V_028C70_COLOR_8_8_8_8: @@ -532,23 +532,23 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_FLOAT: case V_028C70_COLOR_16_16_FLOAT: case V_028C70_COLOR_16_16: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 64-bit buffers. */ case V_028C70_COLOR_16_16_16_16: case V_028C70_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 96-bit buffers. */ case V_028C70_COLOR_32_32_32_FLOAT: /* 128-bit buffers. */ case V_028C70_COLOR_32_32_32_32_FLOAT: case V_028C70_COLOR_32_32_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; default: return ENDIAN_NONE; /* Unsupported. */ } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8a684e63c01..203b39f855f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -501,11 +501,11 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) if (R600_BIG_ENDIAN) { switch(colorformat) { case V_0280A0_COLOR_4_4: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 8-bit buffers. */ case V_0280A0_COLOR_8: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 16-bit buffers. */ case V_0280A0_COLOR_5_6_5: @@ -513,7 +513,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_4_4_4_4: case V_0280A0_COLOR_16: case V_0280A0_COLOR_8_8: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; /* 32-bit buffers. */ case V_0280A0_COLOR_8_8_8_8: @@ -523,22 +523,22 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_FLOAT: case V_0280A0_COLOR_16_16_FLOAT: case V_0280A0_COLOR_16_16: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 64-bit buffers. */ case V_0280A0_COLOR_16_16_16_16: case V_0280A0_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; case V_0280A0_COLOR_32_32_FLOAT: case V_0280A0_COLOR_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 128-bit buffers. */ case V_0280A0_COLOR_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; default: return ENDIAN_NONE; /* Unsupported. */ } From 440224ab73ba19a96629c34e21fe976d1395e483 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 9 Jul 2011 02:46:03 -0700 Subject: [PATCH 032/110] intel: Recognize all depth formats in get_teximage_readbuffer. The existing code was missing GL_DEPTH_COMPONENT32, resulting in it wrongly returning the color buffer instead of the depth buffer. Fixes an issue in PlaneShift 0.5.7 when casting spells. The game calls CopyTexSubImage2D on buffers with a GL_DEPTH_COMPONENT32 internal format, which (prior to this patch) resulted in an attempt to copy ARGB8888 to X8_Z24. Instead of adding the missing enumeration directly, convert the code to use _mesa_is_depth_format() and _mesa_is_depthstencil_format() as these should catch any newly added depth formats in the future. NOTE: This is a candidate for the 7.10 and 7.11 branches. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index eda07a43dee..1a3643da593 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -55,15 +55,11 @@ get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); - switch (internalFormat) { - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH24_STENCIL8_EXT: - case GL_DEPTH_STENCIL_EXT: + if (_mesa_is_depth_format(internalFormat) || + _mesa_is_depthstencil_format(internalFormat)) return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - default: - return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - } + + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } From 95f9e118fe7b02ab5d28550dabd8751e5fb15e3c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 10 Jul 2011 01:27:20 +0800 Subject: [PATCH 033/110] st/egl: fix linking errors Add symbols referenced by src/glx/dri2.c. --- .../state_trackers/egl/x11/x11_screen.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index f1cc4400ba5..6155b4d03c0 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -452,6 +452,12 @@ dri2InvalidateBuffers(Display *dpy, XID drawable) extern unsigned dri2GetSwapEventType(Display *dpy, XID drawable); +extern void * +dri2GetGlxDrawableFromXDrawableId(Display *dpy, XID id); + +extern void * +GetGLXDrawable(Display *dpy, XID drawable); + /** * This is also called from src/glx/dri2.c. */ @@ -460,4 +466,16 @@ unsigned dri2GetSwapEventType(Display *dpy, XID drawable) return 0; } +void * +dri2GetGlxDrawableFromXDrawableId(Display *dpy, XID id) +{ + return NULL; +} + +void * +GetGLXDrawable(Display *dpy, XID drawable) +{ + return NULL; +} + #endif /* GLX_DIRECT_RENDERING */ From d644a50dc328e54d513e4304378bb8c34148f7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 Jul 2011 17:40:38 +0200 Subject: [PATCH 034/110] st/dri: remove unused variables --- src/gallium/state_trackers/dri/common/dri_context.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 08bbdf96e34..e6612b1911d 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -143,8 +143,6 @@ dri_unbind_context(__DRIcontext * cPriv) /* dri_util.c ensures cPriv is not null */ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); struct dri_context *ctx = dri_context(cPriv); - struct dri_drawable *draw = dri_drawable(ctx->dPriv); - struct dri_drawable *read = dri_drawable(ctx->rPriv); struct st_api *stapi = screen->st_api; if (--ctx->bind_count == 0) { From f0a7e28e29b5005c20ac02a7eec6511f6d7fd1c4 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Sun, 10 Jul 2011 13:19:38 -0400 Subject: [PATCH 035/110] r600g: LIT: clamp negative src.y to 0 Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39083 Signed-off-by: Vadim Girlin Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/r600_shader.c | 29 +++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index de49d212a58..3e21ad1fdc6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1370,6 +1370,22 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; + /* tmp.x = max(src.y, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1378,11 +1394,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - /* dst.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; if (i == 2) { alu.dst.write = 1; alu.last = 1; @@ -1394,10 +1412,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } else { - /* tmp.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 2; alu.dst.write = 1; From 1165280cbd37dee1e499358633478ab869de21df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 1 Jun 2011 15:48:51 +0200 Subject: [PATCH 036/110] mesa: initial ARB_depth_buffer_float support Using GL_NONE as DataType of Z32_FLOAT_X24S8, not sure what I should put there. The spec says the type is n/a. Reviewed-by: Kenneth Graunke --- src/mesa/main/fbobject.c | 19 +++++++++++++++++++ src/mesa/main/formats.c | 29 +++++++++++++++++++++++++++++ src/mesa/main/formats.h | 3 +++ src/mesa/main/image.c | 18 ++++++++++++++++-- src/mesa/main/readpix.c | 29 +++++++++++++++++++++++++---- src/mesa/main/renderbuffer.c | 3 +++ src/mesa/main/texfetch.c | 14 ++++++++++++++ src/mesa/main/texformat.c | 13 +++++++++++++ src/mesa/main/texstore.c | 3 +++ 9 files changed, 125 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8cc3fd49a34..d094dd35a69 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1131,6 +1131,16 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat) return GL_DEPTH_STENCIL_EXT; else return 0; + case GL_DEPTH_COMPONENT32F: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT; + else + return 0; + case GL_DEPTH32F_STENCIL8: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_STENCIL; + else + return 0; case GL_RED: case GL_R8: case GL_R16: @@ -2266,6 +2276,15 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, /* special cases */ *params = GL_INDEX; } + else if (format == MESA_FORMAT_Z32_FLOAT_X24S8) { + /* depends on the attachment parameter */ + if (attachment == GL_STENCIL_ATTACHMENT) { + *params = GL_INDEX; + } + else { + *params = GL_FLOAT; + } + } else { *params = _mesa_get_format_datatype(format); } diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index e88ba43971b..f58b1975672 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1091,6 +1091,25 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] = 0, 0, 0, 0, 0, 1, 1, 4 }, + /* ARB_depth_buffer_float */ + { + MESA_FORMAT_Z32_FLOAT, /* Name */ + "MESA_FORMAT_Z32_FLOAT", /* StrName */ + GL_DEPTH_COMPONENT, /* BaseFormat */ + GL_FLOAT, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 0, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 4 /* BlockWidth/Height,Bytes */ + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, /* Name */ + "MESA_FORMAT_Z32_FLOAT_X24S8", /* StrName */ + GL_DEPTH_STENCIL, /* BaseFormat */ + GL_NONE /* XXX */, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 8, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 8 /* BlockWidth/Height,Bytes */ + }, }; @@ -1654,6 +1673,16 @@ _mesa_format_to_type_and_comps(gl_format format, *comps = 1; return; + case MESA_FORMAT_Z32_FLOAT: + *datatype = GL_FLOAT; + *comps = 1; + return; + + case MESA_FORMAT_Z32_FLOAT_X24S8: + *datatype = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + *comps = 1; + return; + case MESA_FORMAT_DUDV8: *datatype = GL_BYTE; *comps = 2; diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 0640bbc4af1..5b8c01781a6 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -209,6 +209,9 @@ typedef enum MESA_FORMAT_RGB9_E5_FLOAT, MESA_FORMAT_R11_G11_B10_FLOAT, + MESA_FORMAT_Z32_FLOAT, + MESA_FORMAT_Z32_FLOAT_X24S8, + MESA_FORMAT_COUNT } gl_format; diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 6d7bc735887..37127dcb7a2 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -84,6 +84,7 @@ _mesa_type_is_packed(GLenum type) case GL_UNSIGNED_INT_24_8_EXT: case GL_UNSIGNED_INT_5_9_9_9_REV: case GL_UNSIGNED_INT_10F_11F_11F_REV: + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: return GL_TRUE; } @@ -228,6 +229,8 @@ _mesa_sizeof_packed_type( GLenum type ) return sizeof(GLuint); case GL_UNSIGNED_INT_10F_11F_11F_REV: return sizeof(GLuint); + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return 8; default: return -1; } @@ -379,6 +382,11 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type ) return sizeof(GLuint); else return -1; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (format == GL_DEPTH_STENCIL) + return 8; + else + return -1; default: return -1; } @@ -531,8 +539,10 @@ _mesa_is_legal_format_and_type(const struct gl_context *ctx, else return GL_FALSE; case GL_DEPTH_STENCIL_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil - && type == GL_UNSIGNED_INT_24_8_EXT) + if ((ctx->Extensions.EXT_packed_depth_stencil && + type == GL_UNSIGNED_INT_24_8_EXT) || + (ctx->Extensions.ARB_depth_buffer_float && + type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)) return GL_TRUE; else return GL_FALSE; @@ -884,6 +894,7 @@ _mesa_is_depth_format(GLenum format) case GL_DEPTH_COMPONENT16: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: + case GL_DEPTH_COMPONENT32F: return GL_TRUE; default: return GL_FALSE; @@ -931,6 +942,7 @@ _mesa_is_depthstencil_format(GLenum format) switch (format) { case GL_DEPTH24_STENCIL8_EXT: case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; @@ -956,6 +968,8 @@ _mesa_is_depth_or_stencil_format(GLenum format) case GL_STENCIL_INDEX16_EXT: case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: + case GL_DEPTH_COMPONENT32F: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 0331a8ca2fe..84c5b22286a 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -61,6 +61,14 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, return GL_TRUE; } + if (ctx->Extensions.ARB_depth_buffer_float + && type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV + && format != GL_DEPTH_STENCIL_EXT) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "gl%sPixels(format is not GL_DEPTH_STENCIL_EXT)", readDraw); + return GL_TRUE; + } + /* basic combinations test */ if (!_mesa_is_legal_format_and_type(ctx, format, type)) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -142,10 +150,23 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, } break; case GL_DEPTH_STENCIL_EXT: - if (!ctx->Extensions.EXT_packed_depth_stencil || - type != GL_UNSIGNED_INT_24_8_EXT) { - _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); - return GL_TRUE; + /* Check validity of the type first. */ + switch (type) { + case GL_UNSIGNED_INT_24_8_EXT: + if (!ctx->Extensions.EXT_packed_depth_stencil) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (!ctx->Extensions.ARB_depth_buffer_float) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; } if ((drawing && !_mesa_dest_buffer_exists(ctx, format)) || (reading && !_mesa_source_buffer_exists(ctx, format))) { diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c index c36175c60e7..f5b20020d23 100644 --- a/src/mesa/main/renderbuffer.c +++ b/src/mesa/main/renderbuffer.c @@ -66,6 +66,9 @@ get_datatype_bytes(struct gl_renderbuffer *rb) int component_size; switch (rb->DataType) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + component_size = 8; + break; case GL_FLOAT: case GL_UNSIGNED_INT: case GL_UNSIGNED_INT_24_8_EXT: diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 6716ce1b071..4b85bc32a92 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -913,6 +913,20 @@ texfetch_funcs[MESA_FORMAT_COUNT] = fetch_texel_2d_r11_g11_b10f, fetch_texel_3d_r11_g11_b10f, store_texel_r11_g11_b10f + }, + { + MESA_FORMAT_Z32_FLOAT, + NULL, /* XXX */ + NULL, + NULL, + NULL + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, + NULL, /* XXX */ + NULL, + NULL, + NULL } }; diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 8cbb021d8b0..c919a74e047 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -416,6 +416,19 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat, } } + if (ctx->Extensions.ARB_depth_buffer_float) { + switch (internalFormat) { + case GL_DEPTH_COMPONENT32F: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT]); + return MESA_FORMAT_Z32_FLOAT; + case GL_DEPTH32F_STENCIL8: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8]); + return MESA_FORMAT_Z32_FLOAT_X24S8; + default: + ; /* fallthrough */ + } + } + if (ctx->Extensions.ATI_envmap_bumpmap) { switch (internalFormat) { case GL_DUDV_ATI: diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index e527981ff47..3249e1444e6 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4419,6 +4419,9 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 }, { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, + + { MESA_FORMAT_Z32_FLOAT, NULL /* XXX */ }, + { MESA_FORMAT_Z32_FLOAT_X24S8, /* XXX */ }, }; From 4843c7b24af7408329d33ab16bb946b17244a5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Jun 2011 03:07:16 +0200 Subject: [PATCH 037/110] mesa: implement texfetch functions for depth_buffer_float Reviewed-by: Kenneth Graunke --- src/mesa/main/texfetch.c | 16 ++++++++-------- src/mesa/main/texfetch_tmp.h | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 4b85bc32a92..72283eb68af 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -916,17 +916,17 @@ texfetch_funcs[MESA_FORMAT_COUNT] = }, { MESA_FORMAT_Z32_FLOAT, - NULL, /* XXX */ - NULL, - NULL, - NULL + fetch_texel_1d_f_r_f32, /* Reuse the R32F functions. */ + fetch_texel_2d_f_r_f32, + fetch_texel_3d_f_r_f32, + store_texel_r_f32 }, { MESA_FORMAT_Z32_FLOAT_X24S8, - NULL, /* XXX */ - NULL, - NULL, - NULL + fetch_texel_1d_z32f_x24s8, + fetch_texel_2d_z32f_x24s8, + fetch_texel_3d_z32f_x24s8, + store_texel_z32f_x24s8 } }; diff --git a/src/mesa/main/texfetch_tmp.h b/src/mesa/main/texfetch_tmp.h index e6fd81d4d57..3b1eedf39bf 100644 --- a/src/mesa/main/texfetch_tmp.h +++ b/src/mesa/main/texfetch_tmp.h @@ -2374,6 +2374,29 @@ static void store_texel_r11_g11_b10f(struct gl_texture_image *texImage, #endif +/* MESA_FORMAT_Z32_FLOAT_X24S8 ***********************************************/ + +static void FETCH(z32f_x24s8)(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + texel[RCOMP] = src[0]; + texel[GCOMP] = 0.0F; + texel[BCOMP] = 0.0F; + texel[ACOMP] = 1.0F; +} + +#if DIM == 3 +static void store_texel_z32f_x24s8(struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, const void *texel) +{ + const GLfloat *src = (const GLfloat *) texel; + GLfloat *dst = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + dst[0] = src[0]; +} +#endif + + #undef TEXEL_ADDR #undef DIM #undef FETCH From ec6fbbe36ee198d00db6a1ae297970531186ae3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 05:06:36 +0200 Subject: [PATCH 038/110] mesa: implement stencil unpacking for GL_FLOAT_32_UNSIGNED_INT_24_8_REV Reviewed-by: Kenneth Graunke --- src/mesa/main/pack.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index a232a51c355..c284c7d8d62 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1971,7 +1971,8 @@ extract_uint_indexes(GLuint n, GLuint indexes[], srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); switch (srcType) { case GL_BITMAP: @@ -2142,6 +2143,23 @@ extract_uint_indexes(GLuint n, GLuint indexes[], } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLuint *s = (const GLuint *) src; + if (unpack->SwapBytes) { + for (i = 0; i < n; i++) { + GLuint value = s[i*2+1]; + SWAP4BYTE(value); + indexes[i] = value & 0xff; /* lower 8 bits */ + } + } + else { + for (i = 0; i < n; i++) + indexes[i] = s[i*2+1] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(NULL, "bad srcType in extract_uint_indexes"); @@ -4412,11 +4430,13 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(dstType == GL_UNSIGNED_BYTE || dstType == GL_UNSIGNED_SHORT || - dstType == GL_UNSIGNED_INT); + dstType == GL_UNSIGNED_INT || + dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); /* only shift and offset apply to stencil */ transferOps &= IMAGE_SHIFT_OFFSET_BIT; @@ -4488,6 +4508,15 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, case GL_UNSIGNED_INT: memcpy(dest, indexes, n * sizeof(GLuint)); break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint *dst = (GLuint *) dest; + GLuint i; + for (i = 0; i < n; i++) { + dst[i*2+1] = indexes[i] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(ctx, "bad dstType in _mesa_unpack_stencil_span"); } From bfb63b7d62ccd9618a110f9f5297f87574384058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 05:09:24 +0200 Subject: [PATCH 039/110] mesa: implement depth unpacking for GL_FLOAT_32_UNSIGNED_INT_24_8_REV Reviewed-by: Kenneth Graunke --- src/mesa/main/pack.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index c284c7d8d62..d42ae7bf0f4 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -4827,6 +4827,20 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLfloat *src = (const GLfloat *)source; + for (i = 0; i < n; i++) { + GLfloat value = src[i * 2]; + if (srcPacking->SwapBytes) { + SWAP4BYTE(value); + } + depthValues[i] = value; + } + needClamp = GL_TRUE; + } + break; case GL_FLOAT: DEPTH_VALUES(GLfloat, 1*); needClamp = GL_TRUE; @@ -4903,9 +4917,18 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, zValues[i] = (GLushort) (depthValues[i] * (GLfloat) depthMax); } } + else if (dstType == GL_FLOAT) { + /* Nothing to do. depthValues is pointing to dest. */ + } + else if (dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) { + GLfloat *zValues = (GLfloat*) dest; + GLuint i; + for (i = 0; i < n; i++) { + zValues[i*2] = depthValues[i]; + } + } else { - ASSERT(dstType == GL_FLOAT); - /*ASSERT(depthMax == 1.0F);*/ + ASSERT(0); } free(depthTemp); From bc878c7f8bfdabb40a4f784984c13b94656569ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 13:36:27 +0200 Subject: [PATCH 040/110] mesa: implement texstore for DEPTH_COMPONENT32F Reviewed-by: Kenneth Graunke --- src/mesa/main/texstore.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 3249e1444e6..777773cc14b 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1002,15 +1002,17 @@ memcpy_texture(struct gl_context *ctx, /** - * Store a 32-bit integer depth component texture image. + * Store a 32-bit integer or float depth component texture image. */ static GLboolean _mesa_texstore_z32(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffffff; const GLuint texelBytes = _mesa_get_format_bytes(dstFormat); + const GLenum dstType = _mesa_get_format_datatype(dstFormat); (void) dims; - ASSERT(dstFormat == MESA_FORMAT_Z32); + ASSERT(dstFormat == MESA_FORMAT_Z32 || + dstFormat == MESA_FORMAT_Z32_FLOAT); ASSERT(texelBytes == sizeof(GLuint)); if (ctx->Pixel.DepthScale == 1.0f && @@ -1018,7 +1020,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) !srcPacking->SwapBytes && baseInternalFormat == GL_DEPTH_COMPONENT && srcFormat == GL_DEPTH_COMPONENT && - srcType == GL_UNSIGNED_INT) { + srcType == dstType) { /* simple memcpy path */ memcpy_texture(ctx, dims, dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, @@ -1039,7 +1041,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) const GLvoid *src = _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, row, 0); _mesa_unpack_depth_span(ctx, srcWidth, - GL_UNSIGNED_INT, (GLuint *) dstRow, + dstType, dstRow, depthScale, srcType, src, srcPacking); dstRow += dstRowStride; } @@ -4420,7 +4422,7 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 }, { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, - { MESA_FORMAT_Z32_FLOAT, NULL /* XXX */ }, + { MESA_FORMAT_Z32_FLOAT, _mesa_texstore_z32 }, { MESA_FORMAT_Z32_FLOAT_X24S8, /* XXX */ }, }; From b2f087cd87ab9b1651d221b5c7d7e543a4585d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 05:17:51 +0200 Subject: [PATCH 041/110] mesa: implement texstore for DEPTH32F_STENCIL8 Reviewed-by: Kenneth Graunke --- src/mesa/main/texstore.c | 68 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 777773cc14b..6e1e63bdfb0 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4287,6 +4287,72 @@ _mesa_texstore_r11_g11_b10f(TEXSTORE_PARAMS) } +static GLboolean +_mesa_texstore_z32f_x24s8(TEXSTORE_PARAMS) +{ + ASSERT(dstFormat == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(srcFormat == GL_DEPTH_STENCIL || + srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX); + ASSERT(srcFormat != GL_DEPTH_STENCIL || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + if (srcFormat == GL_DEPTH_STENCIL && + ctx->Pixel.DepthScale == 1.0f && + ctx->Pixel.DepthBias == 0.0f && + !srcPacking->SwapBytes) { + /* simple path */ + memcpy_texture(ctx, dims, + dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, + dstRowStride, + dstImageOffsets, + srcWidth, srcHeight, srcDepth, srcFormat, srcType, + srcAddr, srcPacking); + } + else if (srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX) { + GLint img, row; + const GLint srcRowStride + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) + / sizeof(uint64_t); + + /* In case we only upload depth we need to preserve the stencil */ + for (img = 0; img < srcDepth; img++) { + uint64_t *dstRow = (uint64_t *) dstAddr + + dstImageOffsets[dstZoffset + img] + + dstYoffset * dstRowStride / sizeof(uint64_t) + + dstXoffset; + const uint64_t *src + = (const uint64_t *) _mesa_image_address(dims, srcPacking, srcAddr, + srcWidth, srcHeight, + srcFormat, srcType, + img, 0, 0); + for (row = 0; row < srcHeight; row++) { + /* The unpack functions with: + * dstType = GL_FLOAT_32_UNSIGNED_INT_24_8_REV + * only write their own dword, so the other dword (stencil + * or depth) is preserved. */ + if (srcFormat != GL_STENCIL_INDEX) + _mesa_unpack_depth_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + 1.0f, srcType, src, srcPacking); + + if (srcFormat != GL_DEPTH_COMPONENT) + _mesa_unpack_stencil_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + srcType, src, srcPacking, + ctx->_ImageTransferState); + + src += srcRowStride; + dstRow += dstRowStride / sizeof(uint64_t); + } + } + } + return GL_TRUE; +} + /** * Table mapping MESA_FORMAT_* to _mesa_texstore_*() @@ -4423,7 +4489,7 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, { MESA_FORMAT_Z32_FLOAT, _mesa_texstore_z32 }, - { MESA_FORMAT_Z32_FLOAT_X24S8, /* XXX */ }, + { MESA_FORMAT_Z32_FLOAT_X24S8, _mesa_texstore_z32f_x24s8 }, }; From bde6a044588401ebbd14881cd5621095c221f0a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 14:18:25 +0200 Subject: [PATCH 042/110] mesa: implement generatemipmap for GL_FLOAT_32_UNSIGNED_INT_24_8_REV Reviewed-by: Kenneth Graunke --- src/mesa/main/mipmap.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index f2724dbca7e..8a811cb7225 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -706,6 +706,17 @@ do_row(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + GLuint i, j, k; + const GLfloat *rowA = (const GLfloat *) srcRowA; + const GLfloat *rowB = (const GLfloat *) srcRowB; + GLfloat *dst = (GLfloat *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i*2] = (rowA[j*2] + rowA[k*2] + rowB[j*2] + rowB[k*2]) * 0.25F; + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } @@ -1341,6 +1352,15 @@ do_row_3D(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + DECLARE_ROW_POINTERS(GLfloat, 2); + + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } From ba15e8260ef6697fcd8c1f6ab098469db6fe78ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 1 Jul 2011 02:04:34 +0200 Subject: [PATCH 043/110] mesa: implement depth/stencil renderbuffer wrapper accessors for Z32F_X24S8 Reviewed-by: Kenneth Graunke --- src/mesa/main/depthstencil.c | 322 ++++++++++++++++++++++++++++++++--- src/mesa/main/depthstencil.h | 5 + src/mesa/main/framebuffer.c | 10 +- 3 files changed, 313 insertions(+), 24 deletions(-) diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index ab62c97fe5a..4d0600050ff 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -393,6 +393,217 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, } +static void +get_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + const GLfloat *src = (const GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (!src) { + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + src = temp; + } + for (i = 0; i < count; i++) { + dst[i] = src[i*2]; + } +} + +static void +get_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(count <= MAX_WIDTH); + /* don't bother trying direct access */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + dst[i] = temp[i*2]; + } +} + +static void +put_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + const GLfloat val = *(GLfloat*)value; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = val; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], + const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + *dst = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + /* get, modify, put */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); +} + + +/** + * Wrap the given GL_DEPTH_STENCIL renderbuffer so that it acts like + * a depth renderbuffer. + * \return new depth renderbuffer + */ +struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb) +{ + struct gl_renderbuffer *z32frb; + + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + z32frb = ctx->Driver.NewRenderbuffer(ctx, 0); + if (!z32frb) + return NULL; + + /* NOTE: need to do manual refcounting here */ + z32frb->Wrapped = dsrb; + dsrb->RefCount++; + + z32frb->Name = dsrb->Name; + z32frb->RefCount = 0; + z32frb->Width = dsrb->Width; + z32frb->Height = dsrb->Height; + z32frb->RowStride = dsrb->RowStride; + z32frb->InternalFormat = GL_DEPTH_COMPONENT32F; + z32frb->Format = MESA_FORMAT_Z32_FLOAT; + z32frb->_BaseFormat = GL_DEPTH_COMPONENT; + z32frb->DataType = GL_FLOAT; + z32frb->Data = NULL; + z32frb->Delete = delete_wrapper; + z32frb->AllocStorage = alloc_wrapper_storage; + z32frb->GetPointer = nop_get_pointer; + z32frb->GetRow = get_row_z32f; + z32frb->GetValues = get_values_z32f; + z32frb->PutRow = put_row_z32f; + z32frb->PutRowRGB = NULL; + z32frb->PutMonoRow = put_mono_row_z32f; + z32frb->PutValues = put_values_z32f; + z32frb->PutMonoValues = put_mono_values_z32f; + + return z32frb; +} + + /*====================================================================== * Stencil wrapper around depth/stencil renderbuffer */ @@ -402,16 +613,22 @@ get_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, GLint x, GLint y, void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; const GLuint *src = (const GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (!src) { dsrb->GetRow(ctx, dsrb, count, x, y, temp); src = temp; } - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = src[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = src[i] & 0xff; } @@ -429,14 +646,20 @@ get_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count const GLint x[], const GLint y[], void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(count <= MAX_WIDTH); /* don't bother trying direct access */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = temp[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = temp[i] & 0xff; } @@ -457,11 +680,19 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, const GLubyte *src = (const GLubyte *) values; GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | src[i]; @@ -479,9 +710,16 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -508,11 +746,19 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou const GLubyte val = *((GLubyte *) value); GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | val; @@ -530,9 +776,16 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -559,11 +812,20 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count struct gl_renderbuffer *dsrb = s8rb->Wrapped; const GLubyte *src = (const GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + dst[1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); @@ -583,9 +845,16 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -610,11 +879,18 @@ put_mono_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint const void *value, const GLubyte *mask) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; const GLubyte val = *((GLubyte *) value); /* get, modify, put */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -644,8 +920,10 @@ _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer struct gl_renderbuffer *s8rb; ASSERT(dsrb->Format == MESA_FORMAT_Z24_S8 || - dsrb->Format == MESA_FORMAT_S8_Z24); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + dsrb->Format == MESA_FORMAT_S8_Z24 || + dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); s8rb = ctx->Driver.NewRenderbuffer(ctx, 0); if (!s8rb) diff --git a/src/mesa/main/depthstencil.h b/src/mesa/main/depthstencil.h index ef63c5d7a31..b47a2e482c2 100644 --- a/src/mesa/main/depthstencil.h +++ b/src/mesa/main/depthstencil.h @@ -33,6 +33,11 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer *dsrb); +extern struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb); + + extern struct gl_renderbuffer * _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer *dsrb); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 66c9bd91096..6e2ce74212e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -631,8 +631,14 @@ _mesa_update_depth_buffer(struct gl_context *ctx, || fb->_DepthBuffer->Wrapped != depthRb || _mesa_get_format_base_format(fb->_DepthBuffer->Format) != GL_DEPTH_COMPONENT) { /* need to update wrapper */ - struct gl_renderbuffer *wrapper - = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + struct gl_renderbuffer *wrapper; + + if (depthRb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + wrapper = _mesa_new_z32f_renderbuffer_wrapper(ctx, depthRb); + } + else { + wrapper = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + } _mesa_reference_renderbuffer(&fb->_DepthBuffer, wrapper); ASSERT(fb->_DepthBuffer->Wrapped == depthRb); } From adea7ea0bc2683c0ee544e20074062df9ae5a72b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 1 Jun 2011 15:49:33 +0200 Subject: [PATCH 044/110] st/mesa: initial ARB_depth_buffer_float support --- src/mesa/state_tracker/st_cb_clear.c | 6 ++++-- src/mesa/state_tracker/st_extensions.c | 11 +++++++++++ src/mesa/state_tracker/st_format.c | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 181fedd2b99..117000ba716 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -381,7 +381,8 @@ check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuff assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (ctx->Scissor.Enabled && (ctx->Scissor.X != 0 || @@ -436,7 +437,8 @@ check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (maskStencil) return GL_TRUE; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d3aebe526dd..99b231d9706 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -607,4 +607,15 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_SM3)) { ctx->Extensions.ARB_shader_texture_lod = GL_TRUE; } + + if (screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW) && + screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW)) { + ctx->Extensions.ARB_depth_buffer_float = GL_TRUE; + } } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index d1995f1ee1d..bd4f0860c52 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -95,6 +95,9 @@ st_format_datatype(enum pipe_format format) format == PIPE_FORMAT_X8Z24_UNORM) { return GL_UNSIGNED_INT_24_8; } + else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + return GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + } else { const GLuint size = format_max_bits(format); @@ -205,6 +208,10 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat) return PIPE_FORMAT_Z24X8_UNORM; case MESA_FORMAT_S8: return PIPE_FORMAT_S8_USCALED; + case MESA_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_Z32_FLOAT; + case MESA_FORMAT_Z32_FLOAT_X24S8: + return PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED; case MESA_FORMAT_YCBCR: return PIPE_FORMAT_UYVY; #if FEATURE_texture_s3tc @@ -427,6 +434,10 @@ st_pipe_format_to_mesa_format(enum pipe_format format) return MESA_FORMAT_X8_Z24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return MESA_FORMAT_S8_Z24; + case PIPE_FORMAT_Z32_FLOAT: + return MESA_FORMAT_Z32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return MESA_FORMAT_Z32_FLOAT_X24S8; case PIPE_FORMAT_UYVY: return MESA_FORMAT_YCBCR; @@ -784,6 +795,10 @@ static const struct format_mapping format_map[] = { { GL_DEPTH_COMPONENT, 0 }, { DEFAULT_DEPTH_FORMATS } }, + { + { GL_DEPTH_COMPONENT32F, 0 }, + { PIPE_FORMAT_Z32_FLOAT, 0 } + }, /* stencil formats */ { @@ -800,6 +815,10 @@ static const struct format_mapping format_map[] = { { GL_DEPTH_STENCIL_EXT, GL_DEPTH24_STENCIL8_EXT, 0 }, { PIPE_FORMAT_Z24_UNORM_S8_USCALED, PIPE_FORMAT_S8_USCALED_Z24_UNORM, 0 } }, + { + { GL_DEPTH32F_STENCIL8, 0 }, + { PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, 0 } + }, /* sRGB formats */ { From e517e5ac7c89b133f341a1075db95e14d0ba23c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Jun 2011 19:09:24 +0200 Subject: [PATCH 045/110] st/mesa: implement read/draw/copypixels for Z32F and Z32F_S8X24 --- src/mesa/state_tracker/st_cb_drawpixels.c | 64 +++++++++++++++++++---- src/mesa/state_tracker/st_cb_readpixels.c | 43 +++++++++++++++ 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d61d7ac22be..dca3324645c 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -812,6 +812,7 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, for (row = 0; row < height; row++) { GLubyte sValues[MAX_WIDTH]; GLuint zValues[MAX_WIDTH]; + GLfloat *zValuesFloat = (GLfloat*)zValues; GLenum destType = GL_UNSIGNED_BYTE; const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, width, height, @@ -822,7 +823,11 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, ctx->_ImageTransferState); if (format == GL_DEPTH_STENCIL) { - _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, + GLenum ztype = + pt->resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ? + GL_FLOAT : GL_UNSIGNED_INT; + + _mesa_unpack_depth_span(ctx, spanWidth, ztype, zValues, (1 << 24) - 1, type, source, &clippedUnpack); } @@ -887,6 +892,26 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLfloat *destf = (GLfloat*)dest; + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + destf[k*2] = zValuesFloat[k]; + dest[k*2+1] = sValues[k] & 0xff; + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k*2+1] = sValues[k] & 0xff; + } + } + break; default: assert(0); } @@ -994,14 +1019,23 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GL_NONE, GL_NONE, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - stencil_format = PIPE_FORMAT_X24S8_USCALED; - else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) - stencil_format = PIPE_FORMAT_S8X24_USCALED; - else - stencil_format = PIPE_FORMAT_S8_USCALED; - if (stencil_format == PIPE_FORMAT_NONE) - goto stencil_fallback; + + switch (tex_format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + stencil_format = PIPE_FORMAT_X24S8_USCALED; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + stencil_format = PIPE_FORMAT_S8X24_USCALED; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; + break; + case PIPE_FORMAT_S8_USCALED: + stencil_format = PIPE_FORMAT_S8_USCALED; + break; + default: + goto stencil_fallback; + } } /* Mesa state should be up to date by now */ @@ -1188,6 +1222,18 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, assert(usage == PIPE_TRANSFER_WRITE); memcpy(dst, src, width); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + { + uint *dst4 = (uint *) dst; + int j; + dst4++; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = src[j] & 0xff; + dst4 += 2; + } + } + break; default: assert(0); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 67926e39297..02ddad7b2f0 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -151,6 +151,24 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + const uint *src = (uint *) (stmap + srcY * pt->stride); + const GLfloat *srcf = (const GLfloat*)src; + GLint k; + for (k = 0; k < width; k++) { + zValues[k] = srcf[k*2]; + sValues[k] = src[k*2+1] & 0xff; + } + } + else { + const uint *src = (uint *) (stmap + srcY * pt->stride); + GLint k; + for (k = 0; k < width; k++) { + sValues[k] = src[k*2+1] & 0xff; + } + } + break; default: assert(0); } @@ -568,6 +586,31 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h dst += dstStride; } } + else if (pformat == PIPE_FORMAT_Z32_FLOAT) { + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat, 0); + y += yStep; + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + assert(format == GL_DEPTH_COMPONENT); + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; /* Z32 */ + GLfloat zfloat2[MAX_WIDTH*2]; /* Z32X32 */ + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat2, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = zfloat2[j*2]; + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } else { /* RGBA format */ /* Do a row at a time to flip image data vertically */ From 8ff6f90c3f138fbe922d1eee6cecffc87234ef34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Jun 2011 23:38:36 +0200 Subject: [PATCH 046/110] gallium/util: implement pack functions for Z32F and Z32F_S8X24 --- src/gallium/auxiliary/util/u_pack_color.h | 64 +++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 5378f2d782f..9391f1b80e0 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -458,6 +458,19 @@ util_pack_mask_z(enum pipe_format format, uint32_t z) } } + +static INLINE uint64_t +util_pack64_mask_z(enum pipe_format format, uint32_t z) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return z; + default: + return util_pack_mask_z(format, z); + } +} + + static INLINE uint32_t util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) { @@ -481,6 +494,21 @@ util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) } +static INLINE uint64_t +util_pack64_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) +{ + uint64_t packed; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + packed = util_pack64_mask_z(format, z); + packed |= (uint64_t)s << 32ull; + return packed; + default: + return util_pack_mask_z_stencil(format, z, s); + } +} + /** * Note: it's assumed that z is in [0,1] @@ -525,6 +553,24 @@ util_pack_z(enum pipe_format format, double z) return 0; } } + + +static INLINE uint64_t +util_pack64_z(enum pipe_format format, double z) +{ + union fi fui; + + if (z == 0) + return 0; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + fui.f = (float)z; + return fui.ui; + default: + return util_pack_z(format, z); + } +} /** @@ -554,6 +600,24 @@ util_pack_z_stencil(enum pipe_format format, double z, uint8_t s) } +static INLINE uint64_t +util_pack64_z_stencil(enum pipe_format format, double z, uint8_t s) +{ + uint64_t packed; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + packed = util_pack64_z(format, z); + packed |= (uint64_t)s << 32ull; + break; + default: + return util_pack_z_stencil(format, z, s); + } + + return packed; +} + + /** * Pack 4 ubytes into a 4-byte word */ From e860cb64dbcb170207641fc280e47858fae74891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Jun 2011 23:39:51 +0200 Subject: [PATCH 047/110] gallium/util: implement software Z32F_S8X24 depth-stencil clear --- src/gallium/auxiliary/util/u_surface.c | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 4c5cc4da182..8e123867da6 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -358,8 +358,41 @@ util_clear_depth_stencil(struct pipe_context *pipe, dst_map += dst_stride; } } - break; + break; case 8: + { + uint64_t zstencil = util_pack64_z_stencil(dst->texture->format, + depth, stencil); + + assert(dst->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED); + + if (!need_rmw) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = zstencil; + dst_map += dst_stride; + } + } + else { + uint64_t src_mask; + + if (clear_flags & PIPE_CLEAR_DEPTH) + src_mask = 0x00000000ffffffffull; + else + src_mask = 0x000000ff00000000ull; + + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) { + uint64_t tmp = *row & ~src_mask; + *row++ = tmp | (zstencil & src_mask); + } + dst_map += dst_stride; + } + } + break; + } default: assert(0); break; From d9ab6712ccb5a7249feaaf071fefdc78e80808a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 21:14:07 +0200 Subject: [PATCH 048/110] gallium/util: handle Z32F_FLOAT_S8X24_USCALED in pipe_tile_raw_to_rgba And make pipe_put_tile_rgba_format no-op like the other Z formats. --- src/gallium/auxiliary/util/u_tile.c | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index e3c7085ba92..23f12e5f464 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -318,6 +318,32 @@ z32f_get_tile_rgba(const float *src, } } +/*** PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32f_x24s8_get_tile_rgba(const float *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src; + src += 2; + } + p += dst_stride; + } +} + void pipe_tile_raw_to_rgba(enum pipe_format format, @@ -352,6 +378,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + z32f_x24s8_get_tile_rgba((float *) src, w, h, dst, dst_stride); + break; default: util_format_read_4f(format, dst, dst_stride * sizeof(float), @@ -445,6 +474,12 @@ pipe_put_tile_rgba_format(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; + case PIPE_FORMAT_Z32_FLOAT: + /*z32f_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + /*z32f_s8x24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; default: util_format_write_4f(format, p, src_stride * sizeof(float), From 89954723bfeef59d055d2332ff112f0204b48130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Jun 2011 19:40:41 +0200 Subject: [PATCH 049/110] r600g: depth_buffer_float support on r600-r700 --- src/gallium/drivers/r600/r600_state.c | 12 ++++++++++++ src/gallium/drivers/r600/r600_texture.c | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 203b39f855f..01406f2bad6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -263,6 +263,10 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028010_DEPTH_X8_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028010_DEPTH_8_24; + case PIPE_FORMAT_Z32_FLOAT: + return V_028010_DEPTH_32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028010_DEPTH_X24_8_32_FLOAT; default: return ~0U; } @@ -353,6 +357,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16_UNORM: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_0280A0_SWAP_STD; /* 64-bit buffers. */ @@ -360,6 +365,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -444,7 +450,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_0280A0_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_0280A0_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -532,6 +542,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_32_FLOAT: case V_0280A0_COLOR_32_32: + case V_0280A0_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 128-bit buffers. */ @@ -635,6 +646,7 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 18460419f85..37e75be6cf2 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -851,6 +851,12 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, result = FMT_8; word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; + case PIPE_FORMAT_Z32_FLOAT: + result = FMT_32_FLOAT; + goto out_word4; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + result = FMT_X24_8_32_FLOAT; + goto out_word4; default: goto out_unknown; } From 3414447011b6b25aeab22f4949a96c09cf4c5098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 29 Jun 2011 02:10:55 +0200 Subject: [PATCH 050/110] docs: update GL3 status --- docs/GL3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 49b48472a4a..135bc4bab67 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -19,7 +19,7 @@ Clamping controls (GL_ARB_color_buffer_float) DONE Float textures, renderbuffers (GL_ARB_texture_float) DONE (gallium r300) GL_EXT_packed_float DONE (gallium r600) GL_EXT_texture_shared_exponent DONE (gallium, swrast) -Float depth buffers (GL_ARB_depth_buffer_float) not started +Float depth buffers (GL_ARB_depth_buffer_float) DONE Framebuffer objects (GL_EXT_framebuffer_object) DONE Half-float DONE Multisample blit DONE From 83478e5d5944e1fc320e8cfb10ba75055bbea3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 Jul 2011 20:01:33 +0200 Subject: [PATCH 051/110] mesa: return early if mask is cleared to zero in BlitFramebuffer From ARB_framebuffer_object: If a buffer is specified in and does not exist in both the read and draw framebuffers, the corresponding bit is silently ignored. --- src/mesa/main/fbobject.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index d094dd35a69..84969360d92 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2603,6 +2603,10 @@ _mesa_BlitFramebufferEXT(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } } + if (!mask) { + return; + } + ASSERT(ctx->Driver.BlitFramebuffer); ctx->Driver.BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, From d1214cca084f277b5acc913490d354edbd4b990f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 10 Jul 2011 20:03:05 +0200 Subject: [PATCH 052/110] swrast: fix depth/stencil blits when there's no colorbuffer NOTE: This is a candidate for the 7.10 and 7.11 branches. --- src/mesa/swrast/s_blit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/swrast/s_blit.c b/src/mesa/swrast/s_blit.c index 3516a41bf41..7f53f19eb62 100644 --- a/src/mesa/swrast/s_blit.c +++ b/src/mesa/swrast/s_blit.c @@ -568,9 +568,6 @@ _swrast_BlitFramebuffer(struct gl_context *ctx, }; GLint i; - if (!ctx->DrawBuffer->_NumColorDrawBuffers) - return; - if (!_mesa_clip_blit(ctx, &srcX0, &srcY0, &srcX1, &srcY1, &dstX0, &dstY0, &dstX1, &dstY1)) { return; From 91a52dae97379d118965567b5c11e393996baeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 21 Jun 2011 23:22:37 +0200 Subject: [PATCH 053/110] WIP r600g: depth_buffer_float renderbuffer support on evergreen --- src/gallium/drivers/r600/evergreen_state.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fbf25feaf20..acc591f2d6e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -265,6 +265,9 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028040_Z_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028040_Z_24; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028040_Z_32_FLOAT; default: return ~0U; } @@ -272,7 +275,8 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) static uint32_t r600_translate_stencilformat(enum pipe_format format) { - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) return 1; else return 0; @@ -360,6 +364,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -369,6 +374,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -453,7 +459,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_028C70_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028C70_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_028C70_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -541,6 +551,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: + case V_028C70_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 96-bit buffers. */ @@ -2123,6 +2134,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); From 01f48a979d85525acd060c8055ec835a1b56ea87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 01:39:53 +0200 Subject: [PATCH 054/110] mesa: implement packing of DEPTH_STENCIL & FLOAT_32_UNSIGNED_INT_24_8_REV combo Tested with the new piglit fbo-depthstencil test. --- src/mesa/main/pack.c | 21 ++++++++++++++++----- src/mesa/main/pack.h | 4 ++-- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/swrast/s_readpix.c | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index d42ae7bf0f4..7de1d05b919 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -5056,10 +5056,11 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, /** - * Pack depth and stencil values as GL_DEPTH_STENCIL/GL_UNSIGNED_INT_24_8. + * Pack depth and stencil values as GL_DEPTH_STENCIL (GL_UNSIGNED_INT_24_8 etc) */ void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking) @@ -5089,9 +5090,19 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, stencilVals = stencilCopy; } - for (i = 0; i < n; i++) { - GLuint z = (GLuint) (depthVals[i] * 0xffffff); - dest[i] = (z << 8) | (stencilVals[i] & 0xff); + switch (dstType) { + case GL_UNSIGNED_INT_24_8: + for (i = 0; i < n; i++) { + GLuint z = (GLuint) (depthVals[i] * 0xffffff); + dest[i] = (z << 8) | (stencilVals[i] & 0xff); + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + for (i = 0; i < n; i++) { + ((GLfloat*)dest)[i*2] = depthVals[i]; + dest[i*2+1] = stencilVals[i] & 0xff; + } + break; } if (dstPacking->SwapBytes) { diff --git a/src/mesa/main/pack.h b/src/mesa/main/pack.h index 78238ea5839..00aab409e42 100644 --- a/src/mesa/main/pack.h +++ b/src/mesa/main/pack.h @@ -130,8 +130,8 @@ _mesa_pack_depth_span(struct gl_context *ctx, GLuint n, GLvoid *dest, extern void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, - GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking); diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 02ddad7b2f0..e2b29fe3068 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -177,7 +177,7 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, dest = _mesa_image_address2d(packing, pixels, width, height, format, type, j, 0); if (format == GL_DEPTH_STENCIL) { - _mesa_pack_depth_stencil_span(ctx, width, dest, + _mesa_pack_depth_stencil_span(ctx, width, type, dest, zValues, sValues, packing); } else { diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 214f2ea1aaa..66ca39293a6 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -446,7 +446,7 @@ read_depth_stencil_pixels(struct gl_context *ctx, GLfloat depthVals[MAX_WIDTH]; _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i, depthVals); - _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst, + _mesa_pack_depth_stencil_span(ctx, width, type, depthStencilDst, depthVals, stencilVals, packing); } } From daf6604435594b2ec861a40eaf4c5a23c97c0714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 04:01:06 +0200 Subject: [PATCH 055/110] r600g: zero memory of ioctl parameters Fixes valgrind warning. --- src/gallium/winsys/r600/drm/r600_drm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 4602f7f2a4b..fa2d77ed464 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -186,7 +186,7 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) static int radeon_drm_get_tiling(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; int r; uint32_t tiling_config = 0; @@ -208,7 +208,7 @@ static int radeon_drm_get_tiling(struct radeon *radeon) static int radeon_get_clock_crystal_freq(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; uint32_t clock_crystal_freq; int r; @@ -226,7 +226,7 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon) static int radeon_get_num_backends(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; uint32_t num_backends; int r; From dc9d789d1b39f9702c179a1d60f76535352563df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 04:13:35 +0200 Subject: [PATCH 056/110] r600g: more valgrind fixes --- src/gallium/winsys/r600/drm/r600_drm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index fa2d77ed464..b5a4d928bf5 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -209,7 +209,7 @@ static int radeon_drm_get_tiling(struct radeon *radeon) static int radeon_get_clock_crystal_freq(struct radeon *radeon) { struct drm_radeon_info info = {}; - uint32_t clock_crystal_freq; + uint32_t clock_crystal_freq = 0; int r; info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; @@ -227,7 +227,7 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon) static int radeon_get_num_backends(struct radeon *radeon) { struct drm_radeon_info info = {}; - uint32_t num_backends; + uint32_t num_backends = 0; int r; info.request = RADEON_INFO_NUM_BACKENDS; From 12265d26ddc72f62de927ac24e12ab41fcd8d1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 04:17:37 +0200 Subject: [PATCH 057/110] Revert "WIP r600g: depth_buffer_float renderbuffer support on evergreen" This reverts commit 91a52dae97379d118965567b5c11e393996baeb9. Pushed accidentally. --- src/gallium/drivers/r600/evergreen_state.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index acc591f2d6e..fbf25feaf20 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -265,9 +265,6 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028040_Z_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028040_Z_24; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - return V_028040_Z_32_FLOAT; default: return ~0U; } @@ -275,8 +272,7 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) static uint32_t r600_translate_stencilformat(enum pipe_format format) { - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || - format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) return 1; else return 0; @@ -364,7 +360,6 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -374,7 +369,6 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -459,11 +453,7 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_028C70_COLOR_24_8; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - return V_028C70_COLOR_X24_8_32_FLOAT; - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT: return V_028C70_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -551,7 +541,6 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: - case V_028C70_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 96-bit buffers. */ @@ -2134,7 +2123,6 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); From e134eaa2c5619b47e944e33d053ee23c61da7aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 13:53:42 +0200 Subject: [PATCH 058/110] mesa: fix assertion failure in _mesa_test_formats Z32_FLOAT_X24S8 has DataType of GL_NONE. --- src/mesa/main/formats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index f58b1975672..e01ea11df97 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1485,7 +1485,8 @@ _mesa_test_formats(void) info->DataType == GL_SIGNED_NORMALIZED || info->DataType == GL_UNSIGNED_INT || info->DataType == GL_INT || - info->DataType == GL_FLOAT); + info->DataType == GL_FLOAT || + info->DataType == GL_NONE); if (info->BaseFormat == GL_RGB) { assert(info->RedBits > 0); From ceb04b32baf13bbd62cf80496e5751612287ef15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 15:18:14 +0200 Subject: [PATCH 059/110] mesa: add a comment in _mesa_test_formats --- src/mesa/main/formats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index e01ea11df97..f9298d2d1e9 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1486,6 +1486,7 @@ _mesa_test_formats(void) info->DataType == GL_UNSIGNED_INT || info->DataType == GL_INT || info->DataType == GL_FLOAT || + /* Z32_FLOAT_X24S8 has DataType of GL_NONE */ info->DataType == GL_NONE); if (info->BaseFormat == GL_RGB) { From 6c53d75e7d41e7c586495e0486d46fbffaa77a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 15:26:51 +0200 Subject: [PATCH 060/110] mesa: fix assertion failure in delete_wrapper --- src/mesa/main/depthstencil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index 4d0600050ff..40d6c9612a2 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -63,7 +63,8 @@ static void delete_wrapper(struct gl_renderbuffer *rb) { ASSERT(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_X8_Z24); + rb->Format == MESA_FORMAT_X8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT); _mesa_reference_renderbuffer(&rb->Wrapped, NULL); free(rb); } From bb0d5cae002f288f822dc9c90e6cd4eaf660c464 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jul 2011 12:59:20 -0600 Subject: [PATCH 061/110] glext.h: update to version 71 --- include/GL/glext.h | 84 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/include/GL/glext.h b/include/GL/glext.h index 6e5e6a11180..9048515c6d9 100644 --- a/include/GL/glext.h +++ b/include/GL/glext.h @@ -6,7 +6,7 @@ extern "C" { #endif /* -** Copyright (c) 2007-2010 The Khronos Group Inc. +** Copyright (c) 2007-2011 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -29,9 +29,9 @@ extern "C" { */ /* Header file version number, required by OpenGL ABI for Linux */ -/* glext.h last updated $Date: 2010-12-09 02:15:08 -0800 (Thu, 09 Dec 2010) $ */ +/* glext.h last updated $Date: 2011-07-06 02:49:14 -0700 (Wed, 06 Jul 2011) $ */ /* Current version at http://www.opengl.org/registry/ */ -#define GL_GLEXT_VERSION 67 +#define GL_GLEXT_VERSION 71 /* Function declaration macros - to move into glplatform.h */ #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) @@ -5032,6 +5032,32 @@ extern "C" { #define GL_SKIP_DECODE_EXT 0x8A4A #endif +#ifndef GL_NV_texture_multisample +#define GL_TEXTURE_COVERAGE_SAMPLES_NV 0x9045 +#define GL_TEXTURE_COLOR_SAMPLES_NV 0x9046 +#endif + +#ifndef GL_AMD_blend_minmax_factor +#define GL_FACTOR_MIN_AMD 0x901C +#define GL_FACTOR_MAX_AMD 0x901D +#endif + +#ifndef GL_AMD_sample_positions +#define GL_SUBSAMPLE_DISTANCE_AMD 0x883F +#endif + +#ifndef GL_EXT_x11_sync_object +#define GL_SYNC_X11_FENCE_EXT 0x90E1 +#endif + +#ifndef GL_AMD_multi_draw_indirect +#endif + +#ifndef GL_EXT_framebuffer_multisample_blit_scaled +#define GL_SCALED_RESOLVE_FASTEST_EXT 0x90BA +#define GL_SCALED_RESOLVE_NICEST_EXT 0x90BB +#endif + /*************************************************************/ @@ -11041,6 +11067,58 @@ typedef void (APIENTRYP PFNGLVDPAUUNMAPSURFACESNVPROC) (GLsizei numSurface, cons #define GL_EXT_texture_sRGB_decode 1 #endif +#ifndef GL_NV_texture_multisample +#define GL_NV_texture_multisample 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glTexImage2DMultisampleCoverageNV (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTexImage3DMultisampleCoverageNV (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage2DMultisampleNV (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage3DMultisampleNV (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage2DMultisampleCoverageNV (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage3DMultisampleCoverageNV (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLECOVERAGENVPROC) (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLECOVERAGENVPROC) (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE2DMULTISAMPLENVPROC) (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE3DMULTISAMPLENVPROC) (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE2DMULTISAMPLECOVERAGENVPROC) (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE3DMULTISAMPLECOVERAGENVPROC) (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +#endif + +#ifndef GL_AMD_blend_minmax_factor +#define GL_AMD_blend_minmax_factor 1 +#endif + +#ifndef GL_AMD_sample_positions +#define GL_AMD_sample_positions 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glSetMultisamplefvAMD (GLenum pname, GLuint index, const GLfloat *val); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLSETMULTISAMPLEFVAMDPROC) (GLenum pname, GLuint index, const GLfloat *val); +#endif + +#ifndef GL_EXT_x11_sync_object +#define GL_EXT_x11_sync_object 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI GLsync APIENTRY glImportSyncEXT (GLenum external_sync_type, GLintptr external_sync, GLbitfield flags); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef GLsync (APIENTRYP PFNGLIMPORTSYNCEXTPROC) (GLenum external_sync_type, GLintptr external_sync, GLbitfield flags); +#endif + +#ifndef GL_AMD_multi_draw_indirect +#define GL_AMD_multi_draw_indirect 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glMultiDrawArraysIndirectAMD (GLenum mode, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +GLAPI void APIENTRY glMultiDrawElementsIndirectAMD (GLenum mode, GLenum type, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTAMDPROC) (GLenum mode, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTAMDPROC) (GLenum mode, GLenum type, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +#endif + +#ifndef GL_EXT_framebuffer_multisample_blit_scaled +#define GL_EXT_framebuffer_multisample_blit_scaled 1 +#endif + #ifdef __cplusplus } From d60880db35fd11d9348ce4b2bfbcc9325d2ebf91 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jul 2011 08:00:59 -0600 Subject: [PATCH 062/110] glx: add a few missing glXChooseFBConfig() attributes Fixes https://bugs.freedesktop.org/show_bug.cgi?id=38842 NOTE: This is a candidate for the 7.11 branch. --- src/gallium/state_trackers/glx/xlib/glx_api.c | 9 ++++++--- src/mesa/drivers/x11/fakeglx.c | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 6233fb81781..a7aafd846cd 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -864,16 +864,19 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) - return NULL; - parselist += 2; - /* ignore */ + return NULL; /* invalid config option */ + parselist += 2; /* ignore the parameter */ break; #ifdef GLX_EXT_texture_from_pixmap diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 48657b44be1..1f5fc33d775 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1097,12 +1097,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) return NULL; parselist += 2; From acf82194ce4383308d1e8939d1778594bf5547c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Mon, 11 Jul 2011 15:36:40 +0100 Subject: [PATCH 063/110] scons: Filter-out NDEBUG define from llvm-config. Based on a similar autoconf change from Marek. --- scons/llvm.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scons/llvm.py b/scons/llvm.py index b89899bbf87..66f972df5fb 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -141,7 +141,15 @@ def generate(env): llvm_version = distutils.version.LooseVersion(llvm_version) try: - env.ParseConfig('llvm-config --cppflags') + # Treat --cppflags specially to prevent NDEBUG from disabling + # assertion failures in debug builds. + cppflags = env.ParseFlags('!llvm-config --cppflags') + try: + cppflags['CPPDEFINES'].remove('NDEBUG') + except ValueError: + pass + env.MergeFlags(cppflags) + env.ParseConfig('llvm-config --libs') env.ParseConfig('llvm-config --ldflags') except OSError: From be039d296d2deeb6dbdc4cdb85c203051488482c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 14:33:21 +0200 Subject: [PATCH 064/110] configure.ac: do not let llvm-config define NDEBUG in debug builds Re-enables assertions in src/mesa. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dde13c9b57d..6a5bb37e6cd 100644 --- a/configure.ac +++ b/configure.ac @@ -1806,7 +1806,7 @@ fi if test "x$enable_gallium_llvm" = xyes; then if test "x$LLVM_CONFIG" != xno; then LLVM_VERSION=`$LLVM_CONFIG --version` - LLVM_CFLAGS=`$LLVM_CONFIG --cppflags` + LLVM_CFLAGS=`$LLVM_CONFIG --cppflags|sed 's/-DNDEBUG\>//g'` LLVM_LIBS="`$LLVM_CONFIG --libs` -lstdc++" LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags` From e3ea5bc08e32119d05bce543c07c61ce93869e60 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 7 Jul 2011 16:41:20 -0700 Subject: [PATCH 065/110] i965: Fix fp-lit-src-equals-dst. We were stomping over the source for the body of the LIT instruction when doing the MOV of 1.0 to the uninteresting channels. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_wm_fp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 59dcda7b414..87eabf1fba1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -611,7 +611,17 @@ static void precalc_lit( struct brw_wm_compile *c, { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, + src0, + src_undef(), + src_undef()); + } + if (dst.WriteMask & WRITEMASK_XW) { struct prog_instruction *swz; @@ -627,16 +637,6 @@ static void precalc_lit( struct brw_wm_compile *c, /* Avoid letting the negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate = NEGATE_NONE; } - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); - } } From 46a7639174d2c55c30ec24b179cbef059fb3ca43 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 7 Jul 2011 17:08:04 -0700 Subject: [PATCH 066/110] i965: Fix fp-dst-aliasing-[12].vpfp. There's no pretty way to avoid the overwriting of the src operands, so just use a temporary destination and rely on the MOV optimization. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_wm_fp.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 87eabf1fba1..7cd3edad235 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -563,13 +563,14 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - + struct prog_dst_register temp = get_temp(c); + if (dst.WriteMask & WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(temp, WRITEMASK_Y), inst->SaturateMode, src0, src1, @@ -584,7 +585,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(temp, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -597,12 +598,26 @@ static void precalc_dst( struct brw_wm_compile *c, */ emit_op(c, OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(temp, WRITEMASK_W), inst->SaturateMode, src1, src_undef(), src_undef()); } + + /* This will get optimized out in general, but it ensures that we + * don't overwrite src operands in our channel-wise splitting + * above. See piglit fp-dst-aliasing-[12]. + */ + emit_op(c, + OPCODE_MOV, + dst, + 0, + src_reg_from_dst(temp), + src_undef(), + src_undef()); + + release_temp(c, temp); } From a166720f2d93b090ad1c383c9bd90d0a9e27760d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 2 May 2011 10:45:02 -0700 Subject: [PATCH 067/110] ir_to_mesa: typo fix in a comment. --- src/mesa/program/ir_to_mesa.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 738e97ca55c..d8e5a3a9772 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1415,9 +1415,9 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) case ir_var_in: case ir_var_inout: /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), user-assign - * generic attributes (glBindVertexLocation), and - * user-defined varyings. + * including deprecated builtins (like gl_Color), + * user-assigned generic attributes (glBindVertexLocation), + * and user-defined varyings. * * FINISHME: We would hit this path for function arguments. Fix! */ From be8551220cc32736165e081bafb2fa0a45a39993 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jul 2011 10:07:32 -0600 Subject: [PATCH 068/110] configs: do not let llvm-config define NDEBUG in debug builds Following the examples of Marek and Jose for autoconf and scons. --- configs/linux-llvm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/linux-llvm b/configs/linux-llvm index 54d82b5376c..ef6c7bb105a 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -30,7 +30,7 @@ else endif ifeq ($(MESA_LLVM),1) - LLVM_CFLAGS=`llvm-config --cppflags` + LLVM_CFLAGS=`llvm-config --cppflags|sed 's/-DNDEBUG\>//g'` LLVM_CXXFLAGS=`llvm-config --cxxflags` -Wno-long-long LLVM_LDFLAGS = $(shell llvm-config --ldflags) LLVM_LIBS = $(shell llvm-config --libs) From 35d5d5df72a2747262e00e521e650c8974d6c64d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 5 May 2011 19:01:25 -0700 Subject: [PATCH 069/110] intel: Make our context structure be a ralloc context. This will let me hang cached compiler structs off of the context without having to worry about cleaning them up at destroy time. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i915/i830_context.c | 3 ++- src/mesa/drivers/dri/i915/i915_context.c | 4 ++-- src/mesa/drivers/dri/i965/brw_context.c | 3 ++- src/mesa/drivers/dri/intel/intel_context.c | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index abfb32be3ae..d22118beb0b 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -33,6 +33,7 @@ #include "tnl/t_pipeline.h" #include "intel_span.h" #include "intel_tris.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -53,7 +54,7 @@ i830CreateContext(const struct gl_config * mesaVis, void *sharedContextPrivate) { struct dd_function_table functions; - struct i830_context *i830 = CALLOC_STRUCT(i830_context); + struct i830_context *i830 = rzalloc(NULL, struct i830_context); struct intel_context *intel = &i830->intel; struct gl_context *ctx = &intel->ctx; if (!i830) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f02f2d78267..11bee140ab6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -36,6 +36,7 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" +#include "../glsl/ralloc.h" #include "i915_reg.h" #include "i915_program.h" @@ -97,8 +98,7 @@ i915CreateContext(int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct i915_context *i915 = - (struct i915_context *) CALLOC_STRUCT(i915_context); + struct i915_context *i915 = rzalloc(NULL, struct i915_context); struct intel_context *intel = &i915->intel; struct gl_context *ctx = &intel->ctx; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 636821839a1..ac683bd9960 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -40,6 +40,7 @@ #include "brw_state.h" #include "intel_span.h" #include "tnl/t_pipeline.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -59,7 +60,7 @@ GLboolean brwCreateContext( int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct brw_context *brw = rzalloc(NULL, struct brw_context); struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; unsigned i; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 292b7b034ee..2ba13632569 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -56,7 +56,7 @@ #include "drirenderbuffer.h" #include "utils.h" - +#include "../glsl/ralloc.h" #ifndef INTEL_DEBUG int INTEL_DEBUG = (0); @@ -924,7 +924,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) _math_matrix_dtr(&intel->ViewportMatrix); - FREE(intel); + ralloc_free(intel); driContextPriv->driverPrivate = NULL; } } From d375df220fae47f38944c4832bcbd5f5d568884c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 15:14:20 -0700 Subject: [PATCH 070/110] i965: Add a type argument to brw_state_batch(). I want to make brw_state_dump.c handle more than just the last statechange, so I want to keep track of what's in the batch state. By using AUB file numbering for most of these packets, this may be reusable for aub dumping. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_cc.c | 6 +++-- src/mesa/drivers/dri/i965/brw_clip_state.c | 3 ++- src/mesa/drivers/dri/i965/brw_context.h | 24 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_gs_state.c | 3 ++- src/mesa/drivers/dri/i965/brw_sf_state.c | 6 +++-- src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_batch.c | 1 + src/mesa/drivers/dri/i965/brw_vs_state.c | 3 ++- .../drivers/dri/i965/brw_vs_surface_state.c | 3 ++- .../drivers/dri/i965/brw_wm_sampler_state.c | 9 ++++--- src/mesa/drivers/dri/i965/brw_wm_state.c | 3 ++- .../drivers/dri/i965/brw_wm_surface_state.c | 15 ++++++++---- src/mesa/drivers/dri/i965/gen6_cc.c | 6 +++-- src/mesa/drivers/dri/i965/gen6_depthstencil.c | 3 ++- .../drivers/dri/i965/gen6_scissor_state.c | 3 ++- .../drivers/dri/i965/gen6_viewport_state.c | 6 +++-- src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- .../drivers/dri/i965/gen7_sampler_state.c | 3 ++- .../drivers/dri/i965/gen7_viewport_state.c | 3 ++- src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +- .../drivers/dri/i965/gen7_wm_surface_state.c | 15 ++++++------ 22 files changed, 87 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 94b8c20b019..9c26150d241 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -43,7 +43,8 @@ prepare_cc_vp(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_viewport *ccv; - ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset); + ccv = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, &brw->cc.vp_offset); /* _NEW_TRANSOFORM */ if (ctx->Transform.DepthClamp) { @@ -98,7 +99,8 @@ static void upload_cc_unit(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_unit_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_STENCIL */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index b9efbb74c87..31fbadf5ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -40,7 +40,8 @@ brw_prepare_clip_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_clip_unit_state *clip; - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE, + sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a8e2b802803..012617b1d2f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -188,6 +188,30 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index bbfefcd816a..e0309e71fc3 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -41,7 +41,8 @@ brw_prepare_gs_unit(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct brw_gs_unit_state *gs; - gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset); + gs = brw_state_batch(brw, AUB_TRACE_GS_STATE, + sizeof(*gs), 32, &brw->gs.state_offset); memset(gs, 0, sizeof(*gs)); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index eb3d103099b..9201be7caab 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -46,7 +46,8 @@ static void upload_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { @@ -129,7 +130,8 @@ static void upload_sf_unit( struct brw_context *brw ) int chipset_max_threads; bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset); + sf = brw_state_batch(brw, AUB_TRACE_SF_STATE, + sizeof(*sf), 64, &brw->sf.state_offset); memset(sf, 0, sizeof(*sf)); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b384651d8d0..cede4e5c916 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -172,6 +172,7 @@ void brw_destroy_caches( struct brw_context *brw ); sizeof(*(s)), false) void *brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 213c7a38d8c..32f315ef107 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -49,6 +49,7 @@ */ void * brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset) diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 179ca199b45..fc4373ab311 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -43,7 +43,8 @@ brw_prepare_vs_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vs_unit_state *vs; - vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + vs = brw_state_batch(brw, AUB_TRACE_VS_STATE, + sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 611f6333689..f9ee4d112a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -182,7 +182,8 @@ static void upload_vs_surfaces(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. (once we have vs samplers) */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(uint32_t) * BRW_VS_MAX_SURF, 32, &brw->vs.bind_bo_offset); for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 5de39aa4575..98146136703 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -108,7 +108,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, if (intel->gen == 5 || intel->gen == 6) { struct gen5_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); memset(sdc, 0, sizeof(*sdc)); @@ -144,7 +145,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, } else { struct brw_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); COPY_4V(sdc->color, color); } @@ -326,7 +328,8 @@ prepare_wm_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 506e2bdff5b..c820ce48c29 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -78,7 +78,8 @@ brw_prepare_wm_unit(struct brw_context *brw) const struct gl_fragment_program *fp = brw->fragment_program; struct brw_wm_unit_state *wm; - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset); + wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, + sizeof(*wm), 32, &brw->wm.state_offset); memset(wm, 0, sizeof(*wm)); if (brw->wm.prog_data->prog_offset_16) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 89fea9cc952..fb4fb146f8d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -226,7 +226,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -272,7 +273,8 @@ brw_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, out_offset); surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -404,7 +406,8 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) struct intel_context *intel = &brw->intel; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); @@ -439,7 +442,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t tile_x, tile_y; uint32_t format = 0; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -637,7 +641,8 @@ brw_wm_upload_binding_table(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_WM_MAX_SURF, 32, &brw->wm.bind_bo_offset); for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 294d5a5e644..41d13ad2bf4 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -51,7 +51,8 @@ prepare_blend_state(struct brw_context *brw) nr_draw_buffers = 1; size = sizeof(*blend) * nr_draw_buffers; - blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); + blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, &brw->cc.blend_state_offset); memset(blend, 0, size); @@ -139,7 +140,8 @@ gen6_prepare_color_calc_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_color_calc_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_COLOR */ diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 775e1ce2c9c..5d14147db3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -34,7 +34,8 @@ gen6_prepare_depth_stencil_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_depth_stencil_state *ds; - ds = brw_state_batch(brw, sizeof(*ds), 64, + ds = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*ds), 64, &brw->cc.depth_stencil_state_offset); memset(ds, 0, sizeof(*ds)); diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 7492e508864..dc73b10f4cd 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -39,7 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw) struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; - scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset); + scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, + sizeof(*scissor), 32, &scissor_state_offset); /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index c6c55c926c7..a4bfa54837d 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -43,7 +43,8 @@ prepare_clip_vp(struct brw_context *brw) { struct brw_clipper_viewport *vp; - vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE, + sizeof(*vp), 32, &brw->clip.vp_offset); vp->xmin = -1.0; vp->xmax = 1.0; @@ -72,7 +73,8 @@ prepare_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); /* _NEW_BUFFERS */ diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 022e23e12b0..ecdf5e4f614 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -60,7 +60,7 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) float *param; int i; - param = brw_state_batch(brw, + param = brw_state_batch(brw, AUB_TRACE_NO_TYPE, (MAX_CLIP_PLANES + nr_params) * 4 * sizeof(float), 32, &brw->vs.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 9ef6133e2b9..185da9c355f 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 95f6fbf7414..e787c21f4d1 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -183,7 +183,8 @@ gen7_prepare_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index 838ad3a3948..e9aacd56317 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -36,7 +36,8 @@ prepare_sf_clip_viewport(struct brw_context *brw) const GLfloat *v = ctx->Viewport._WindowMap.m; struct gen7_sf_clip_viewport *vp; - vp = brw_state_batch(brw, sizeof(vp), 64, &brw->sf.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(vp), 64, &brw->sf.vp_offset); /* Also assign to clip.vp_offset in case something uses it. */ brw->clip.vp_offset = brw->sf.vp_offset; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 0f5b06cbf48..a102ca772b3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -51,7 +51,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 9994b67bfc5..4add1a69f02 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -65,8 +65,8 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = translate_tex_target(tObj->Target); @@ -135,7 +135,8 @@ gen7_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, out_offset); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_BUFFER; @@ -210,8 +211,8 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) { struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_NULL; @@ -235,8 +236,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct gen7_surface_state *surf; uint32_t tile_x, tile_y; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); switch (irb->Base.Format) { From 65c6de000e016e9a9468d32688514a00255f0fa4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 15:48:09 -0700 Subject: [PATCH 071/110] i965: Track the brw_state_batch() data while under INTEL_DEBUG=batch. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 8 +++++++ src/mesa/drivers/dri/i965/brw_state_batch.c | 26 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vtbl.c | 2 ++ 3 files changed, 36 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 012617b1d2f..1a918f47b26 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -768,6 +768,14 @@ struct brw_context int num_prepare_atoms, num_emit_atoms; struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; }; diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 32f315ef107..5a983c3d847 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -32,6 +32,29 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "main/imports.h" +#include "../glsl/ralloc.h" + +static void +brw_track_state_batch(struct brw_context *brw, + enum state_struct_type type, + uint32_t offset, + int size) +{ + struct intel_batchbuffer *batch = &brw->intel.batch; + + if (!brw->state_batch_list) { + /* Our structs are always aligned to at least 32 bytes, so + * our array doesn't need to be any larger + */ + brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) * + batch->bo->size / 32); + } + + brw->state_batch_list[brw->state_batch_count].offset = offset; + brw->state_batch_list[brw->state_batch_count].size = size; + brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_count++; +} /** * Allocates a block of space in the batchbuffer for indirect state. @@ -72,6 +95,9 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + brw_track_state_batch(brw, type, offset, size); + *out_offset = offset; return batch->map + (offset>>2); } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 8612e743265..6aeeda6e0fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -123,6 +123,8 @@ static void brw_new_batch( struct intel_context *intel ) */ intel->batch.need_workaround_flush = true; + brw->state_batch_count = 0; + brw->vb.nr_current_buffers = 0; /* Mark that the current program cache BO has been used by the GPU. From 2a8d744345da718ed98b7ee216c4091018830900 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 15:56:07 -0700 Subject: [PATCH 072/110] i965: Move CLIP VP state dump to using the state_batch_list[]. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 7a3a88f04f5..1b76a301b93 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -319,7 +319,8 @@ static void dump_sf_viewport_state(struct brw_context *brw) drm_intel_bo_unmap(intel->batch.bo); } -static void dump_clip_viewport_state(struct brw_context *brw) +static void dump_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; @@ -330,8 +331,8 @@ static void dump_clip_viewport_state(struct brw_context *brw) drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + brw->clip.vp_offset; - vp_off = intel->batch.bo->offset + brw->clip.vp_offset; + vp = intel->batch.bo->virtual + offset; + vp_off = intel->batch.bo->offset + offset; state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); @@ -486,6 +487,21 @@ static void brw_debug_prog(struct brw_context *brw, drm_intel_bo_unmap(brw->cache.bo); } +static void +dump_state_batch(struct brw_context *brw) +{ + int i; + + for (i = 0; i < brw->state_batch_count; i++) { + switch (brw->state_batch_list[i].type) { + case AUB_TRACE_CLIP_VP_STATE: + dump_clip_viewport_state(brw, brw->state_batch_list[i].offset); + break; + default: + break; + } + } +} /** * Print additional debug information associated with the batchbuffer @@ -534,8 +550,6 @@ void brw_debug_batch(struct intel_context *intel) dump_sf_clip_viewport_state(brw); else dump_sf_viewport_state(brw); - if (intel->gen == 6) - dump_clip_viewport_state(brw); if (intel->gen < 6) state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, @@ -548,4 +562,6 @@ void brw_debug_batch(struct intel_context *intel) dump_cc_state(brw); dump_blend_state(brw); } + + dump_state_batch(brw); } From fbfeff73f3d6195e62ee413c18a93632edc3510d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 15:58:50 -0700 Subject: [PATCH 073/110] i965: Move the SF VP state dump to using the state_batch_list[] Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 1b76a301b93..2736ad5aa42 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -290,7 +290,8 @@ static void dump_gen7_sampler_state(struct brw_context *brw) } -static void dump_sf_viewport_state(struct brw_context *brw) +static void dump_sf_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF VP"; @@ -301,8 +302,8 @@ static void dump_sf_viewport_state(struct brw_context *brw) drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; + vp = intel->batch.bo->virtual + offset; + vp_off = intel->batch.bo->offset + offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); @@ -341,7 +342,8 @@ static void dump_clip_viewport_state(struct brw_context *brw, drm_intel_bo_unmap(intel->batch.bo); } -static void dump_sf_clip_viewport_state(struct brw_context *brw) +static void dump_sf_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF_CLIP VP"; @@ -352,8 +354,8 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw) drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; + vp = intel->batch.bo->virtual + offset; + vp_off = intel->batch.bo->offset + offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); @@ -490,12 +492,22 @@ static void brw_debug_prog(struct brw_context *brw, static void dump_state_batch(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; int i; for (i = 0; i < brw->state_batch_count; i++) { + uint32_t offset = brw->state_batch_list[i].offset; + switch (brw->state_batch_list[i].type) { case AUB_TRACE_CLIP_VP_STATE: - dump_clip_viewport_state(brw, brw->state_batch_list[i].offset); + dump_clip_viewport_state(brw, offset); + break; + case AUB_TRACE_SF_VP_STATE: + if (intel->gen >= 7) { + dump_sf_clip_viewport_state(brw, offset); + } else { + dump_sf_viewport_state(brw, offset); + } break; default: break; @@ -546,10 +558,6 @@ void brw_debug_batch(struct intel_context *intel) sizeof(struct brw_sf_unit_state)); brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); } - if (intel->gen >= 7) - dump_sf_clip_viewport_state(brw); - else - dump_sf_viewport_state(brw); if (intel->gen < 6) state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, From 00f07b33eca37c48e9c487426e80cb85bceffff2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 16:03:05 -0700 Subject: [PATCH 074/110] i965: Move the new gen6 state structs to using state_batch_list[]. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 46 +++++++++++----------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 2736ad5aa42..6dd13149638 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -371,7 +371,7 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw, } -static void dump_cc_viewport_state(struct brw_context *brw) +static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "CC VP"; @@ -380,15 +380,15 @@ static void dump_cc_viewport_state(struct brw_context *brw) drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + brw->cc.vp_offset; - vp_off = intel->batch.bo->offset + brw->cc.vp_offset; + vp = intel->batch.bo->virtual + offset; + vp_off = intel->batch.bo->offset + offset; state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); drm_intel_bo_unmap(intel->batch.bo); } -static void dump_depth_stencil_state(struct brw_context *brw) +static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "DEPTH STENCIL"; @@ -397,8 +397,8 @@ static void dump_depth_stencil_state(struct brw_context *brw) drm_intel_bo_map(intel->batch.bo, GL_FALSE); - ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset; - ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset; + ds = intel->batch.bo->virtual + offset; + ds_off = intel->batch.bo->offset + offset; state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", ds->ds0.stencil_enable ? "en" : "dis", @@ -413,19 +413,16 @@ static void dump_depth_stencil_state(struct brw_context *brw) drm_intel_bo_unmap(intel->batch.bo); } -static void dump_cc_state(struct brw_context *brw) +static void dump_cc_state(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; struct gen6_color_calc_state *cc; uint32_t cc_off; dri_bo *bo = brw->intel.batch.bo; - if (brw->cc.state_offset == 0) - return; - drm_intel_bo_map(bo, GL_FALSE); - cc = bo->virtual + brw->cc.state_offset; - cc_off = bo->offset + brw->cc.state_offset; + cc = bo->virtual + offset; + cc_off = bo->offset + offset; state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," "bf stencil ref %d\n", @@ -443,7 +440,7 @@ static void dump_cc_state(struct brw_context *brw) } -static void dump_blend_state(struct brw_context *brw) +static void dump_blend_state(struct brw_context *brw, uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "BLEND"; @@ -452,8 +449,8 @@ static void dump_blend_state(struct brw_context *brw) drm_intel_bo_map(intel->batch.bo, GL_FALSE); - blend = intel->batch.bo->virtual + brw->cc.blend_state_offset; - blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset; + blend = intel->batch.bo->virtual + offset; + blend_off = intel->batch.bo->offset + offset; state_out(name, blend, blend_off, 0, "\n"); state_out(name, blend, blend_off, 1, "\n"); @@ -509,6 +506,18 @@ dump_state_batch(struct brw_context *brw) dump_sf_viewport_state(brw, offset); } break; + case AUB_TRACE_CC_VP_STATE: + dump_cc_viewport_state(brw, offset); + break; + case AUB_TRACE_DEPTH_STENCIL_STATE: + dump_depth_stencil_state(brw, offset); + break; + case AUB_TRACE_CC_STATE: + dump_cc_state(brw, offset); + break; + case AUB_TRACE_BLEND_STATE: + dump_blend_state(brw, offset); + break; default: break; } @@ -564,12 +573,5 @@ void brw_debug_batch(struct intel_context *intel) sizeof(struct brw_wm_unit_state)); brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); - if (intel->gen >= 6) { - dump_cc_viewport_state(brw); - dump_depth_stencil_state(brw); - dump_cc_state(brw); - dump_blend_state(brw); - } - dump_state_batch(brw); } From d4846674232112bc3c7ad583e0daec0dfa2a3d26 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 16:12:29 -0700 Subject: [PATCH 075/110] i965: Map the batch once for dumping all our state batch structs. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 27 ++-------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 6dd13149638..2356490e418 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -300,8 +300,6 @@ static void dump_sf_viewport_state(struct brw_context *brw, assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + offset; vp_off = intel->batch.bo->offset + offset; @@ -316,8 +314,6 @@ static void dump_sf_viewport_state(struct brw_context *brw, vp->scissor.xmin, vp->scissor.ymin); state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - - drm_intel_bo_unmap(intel->batch.bo); } static void dump_clip_viewport_state(struct brw_context *brw, @@ -330,8 +326,6 @@ static void dump_clip_viewport_state(struct brw_context *brw, assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + offset; vp_off = intel->batch.bo->offset + offset; @@ -339,7 +333,6 @@ static void dump_clip_viewport_state(struct brw_context *brw, state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); - drm_intel_bo_unmap(intel->batch.bo); } static void dump_sf_clip_viewport_state(struct brw_context *brw, @@ -352,8 +345,6 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw, assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + offset; vp_off = intel->batch.bo->offset + offset; @@ -367,7 +358,6 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw, state_out(name, vp, vp_off, 7, "guardband xmax = %f\n", vp->guardband.xmax); state_out(name, vp, vp_off, 8, "guardband ymin = %f\n", vp->guardband.ymin); state_out(name, vp, vp_off, 9, "guardband ymax = %f\n", vp->guardband.ymax); - drm_intel_bo_unmap(intel->batch.bo); } @@ -378,14 +368,11 @@ static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) struct brw_cc_viewport *vp; uint32_t vp_off; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - vp = intel->batch.bo->virtual + offset; vp_off = intel->batch.bo->offset + offset; state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); - drm_intel_bo_unmap(intel->batch.bo); } static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) @@ -395,8 +382,6 @@ static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) struct gen6_depth_stencil_state *ds; uint32_t ds_off; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - ds = intel->batch.bo->virtual + offset; ds_off = intel->batch.bo->offset + offset; @@ -410,7 +395,6 @@ static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) ds->ds2.depth_test_enable ? "en" : "dis", ds->ds2.depth_test_func, ds->ds2.depth_write_enable ? "en" : "dis"); - drm_intel_bo_unmap(intel->batch.bo); } static void dump_cc_state(struct brw_context *brw, uint32_t offset) @@ -420,7 +404,6 @@ static void dump_cc_state(struct brw_context *brw, uint32_t offset) uint32_t cc_off; dri_bo *bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); cc = bo->virtual + offset; cc_off = bo->offset + offset; @@ -435,9 +418,6 @@ static void dump_cc_state(struct brw_context *brw, uint32_t offset) state_out(name, cc, cc_off, 3, "constant green %f\n", cc->constant_g); state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); - - drm_intel_bo_unmap(bo); - } static void dump_blend_state(struct brw_context *brw, uint32_t offset) @@ -447,16 +427,11 @@ static void dump_blend_state(struct brw_context *brw, uint32_t offset) struct gen6_blend_state *blend; uint32_t blend_off; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - blend = intel->batch.bo->virtual + offset; blend_off = intel->batch.bo->offset + offset; state_out(name, blend, blend_off, 0, "\n"); state_out(name, blend, blend_off, 1, "\n"); - - drm_intel_bo_unmap(intel->batch.bo); - } static void brw_debug_prog(struct brw_context *brw, @@ -573,5 +548,7 @@ void brw_debug_batch(struct intel_context *intel) sizeof(struct brw_wm_unit_state)); brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); + drm_intel_bo_map(intel->batch.bo, false); dump_state_batch(brw); + drm_intel_bo_unmap(intel->batch.bo); } From 709a7af2afd72283f3532e5574fbfdef91f70e28 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 16:10:56 -0700 Subject: [PATCH 076/110] i965: Dump brw_state_batch data in an easier format for cross-referencing. Now that we're using state base addresses for most things, we're less interested in the absolute address of the state, and more in its offset from the state base address (start of batchbuffer). Also, reorder the printout so it looks more like the batchbuffer dump. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 164 ++++++++++----------- 1 file changed, 77 insertions(+), 87 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 2356490e418..77dabdff993 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -31,6 +31,10 @@ #include "brw_context.h" #include "brw_defines.h" +static void +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) PRINTFLIKE(5, 6); + /** * Prints out a header, the contents, and the message associated with * the hardware state data given. @@ -53,6 +57,21 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index, va_end(va); } +static void +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) +{ + struct intel_context *intel = &brw->intel; + uint32_t *data = intel->batch.bo->virtual + offset; + va_list va; + + fprintf(stderr, "0x%08x: 0x%08x: %8s: ", + offset + index * 4, data[index], name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + /** Generic, undecoded state buffer debug printout */ static void state_struct_out(const char *name, drm_intel_bo *buffer, @@ -295,24 +314,20 @@ static void dump_sf_viewport_state(struct brw_context *brw, { struct intel_context *intel = &brw->intel; const char *name = "SF VP"; - struct brw_sf_viewport *vp; - uint32_t vp_off; + struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - vp = intel->batch.bo->virtual + offset; - vp_off = intel->batch.bo->offset + offset; + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - - state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + batch_out(brw, name, offset, 6, "top left = %d,%d\n", vp->scissor.xmin, vp->scissor.ymin); - state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); } @@ -321,18 +336,14 @@ static void dump_clip_viewport_state(struct brw_context *brw, { struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; - struct brw_clipper_viewport *vp; - uint32_t vp_off; + struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - vp = intel->batch.bo->virtual + offset; - vp_off = intel->batch.bo->offset + offset; - - state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); - state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); - state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); - state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); + batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); + batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); + batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); + batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); } static void dump_sf_clip_viewport_state(struct brw_context *brw, @@ -340,98 +351,77 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw, { struct intel_context *intel = &brw->intel; const char *name = "SF_CLIP VP"; - struct gen7_sf_clip_viewport *vp; - uint32_t vp_off; + struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen >= 7); - vp = intel->batch.bo->virtual + offset; - vp_off = intel->batch.bo->offset + offset; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 6, "guardband xmin = %f\n", vp->guardband.xmin); - state_out(name, vp, vp_off, 7, "guardband xmax = %f\n", vp->guardband.xmax); - state_out(name, vp, vp_off, 8, "guardband ymin = %f\n", vp->guardband.ymin); - state_out(name, vp, vp_off, 9, "guardband ymax = %f\n", vp->guardband.ymax); + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); } static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; const char *name = "CC VP"; - struct brw_cc_viewport *vp; - uint32_t vp_off; + struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; - vp = intel->batch.bo->virtual + offset; - vp_off = intel->batch.bo->offset + offset; - - state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); - state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); + batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); + batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); } static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; - const char *name = "DEPTH STENCIL"; - struct gen6_depth_stencil_state *ds; - uint32_t ds_off; + const char *name = "D_S"; + struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; - ds = intel->batch.bo->virtual + offset; - ds_off = intel->batch.bo->offset + offset; - - state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", - ds->ds0.stencil_enable ? "en" : "dis", - ds->ds0.stencil_func, - ds->ds0.stencil_write_enable ? "en" : "dis"); - state_out(name, ds, ds_off, 1, "stencil test mask 0x%x, write mask 0x%x\n", - ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); - state_out(name, ds, ds_off, 2, "depth test %sable, func %d, write %sable\n", - ds->ds2.depth_test_enable ? "en" : "dis", - ds->ds2.depth_test_func, - ds->ds2.depth_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 0, + "stencil %sable, func %d, write %sable\n", + ds->ds0.stencil_enable ? "en" : "dis", + ds->ds0.stencil_func, + ds->ds0.stencil_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 1, + "stencil test mask 0x%x, write mask 0x%x\n", + ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); + batch_out(brw, name, offset, 2, + "depth test %sable, func %d, write %sable\n", + ds->ds2.depth_test_enable ? "en" : "dis", + ds->ds2.depth_test_func, + ds->ds2.depth_write_enable ? "en" : "dis"); } static void dump_cc_state(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; - struct gen6_color_calc_state *cc; - uint32_t cc_off; - dri_bo *bo = brw->intel.batch.bo; + struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; - cc = bo->virtual + offset; - cc_off = bo->offset + offset; - - state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," - "bf stencil ref %d\n", - cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", - cc->cc0.round_disable, - cc->cc0.stencil_ref, - cc->cc0.bf_stencil_ref); - state_out(name, cc, cc_off, 1, "\n"); - state_out(name, cc, cc_off, 2, "constant red %f\n", cc->constant_r); - state_out(name, cc, cc_off, 3, "constant green %f\n", cc->constant_g); - state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); - state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); + batch_out(brw, name, offset, 0, + "alpha test format %s, round disable %d, stencil ref %d, " + "bf stencil ref %d\n", + cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", + cc->cc0.round_disable, + cc->cc0.stencil_ref, + cc->cc0.bf_stencil_ref); + batch_out(brw, name, offset, 1, "\n"); + batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); + batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); + batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); + batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); } static void dump_blend_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; const char *name = "BLEND"; - struct gen6_blend_state *blend; - uint32_t blend_off; - blend = intel->batch.bo->virtual + offset; - blend_off = intel->batch.bo->offset + offset; - - state_out(name, blend, blend_off, 0, "\n"); - state_out(name, blend, blend_off, 1, "\n"); + batch_out(brw, name, offset, 0, "\n"); + batch_out(brw, name, offset, 1, "\n"); } static void brw_debug_prog(struct brw_context *brw, From 13e82ece6d4761f07fb01e5a9aa57448d3bd2345 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 16:53:06 -0700 Subject: [PATCH 077/110] i965: Dump the surface, sampler, and sdc state using state_batch_list[]. Now, for example, INTEL_DEBUG=batch tex-border-1 shows all the texturing state involved. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 267 ++++++++------------- 1 file changed, 104 insertions(+), 163 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 77dabdff993..a7ead641605 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -117,193 +117,123 @@ get_965_surface_format(unsigned int surface_format) } } -static void dump_wm_surface_state(struct brw_context *brw) +static void dump_surface_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; - - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - uint32_t *surf; - char name[20]; - - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (uint32_t *)(base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, - GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1); - state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n", - GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, - (surf[3] & BRW_SURFACE_TILED) ? - ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); - state_out(name, surf, surfoff, 4, "mip base %d\n", - GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), - GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); - } - drm_intel_bo_unmap(bo); + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, + GET_FIELD(surf[2], BRW_SURFACE_LOD)); + batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); } -static void dump_gen7_surface_state(struct brw_context *brw) +static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + const char *name = "SURF"; + struct gen7_surface_state *surf = brw->intel.batch.bo->virtual + offset; - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; - - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - struct gen7_surface_state *surf; - char name[20]; - - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (struct gen7_surface_state *) (base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); - state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss5.min_lod); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); - } - drm_intel_bo_unmap(bo); + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); + batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); + batch_out(brw, name, offset, 4, "mip base %d\n", + surf->ss5.min_lod); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); } -static void dump_wm_sampler_state(struct brw_context *brw) +static void +dump_sdc(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SDC"; + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5 && intel->gen <= 6) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "unorm rgba\n"); + batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); + batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); + batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); + batch_out(brw, name, offset, 5, "half float rg\n"); + batch_out(brw, name, offset, 6, "half float ba\n"); + batch_out(brw, name, offset, 7, "u16 rg\n"); + batch_out(brw, name, offset, 8, "u16 ba\n"); + batch_out(brw, name, offset, 9, "s16 rg\n"); + batch_out(brw, name, offset, 10, "s16 ba\n"); + batch_out(brw, name, offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); + batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); + batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); + } +} + +static void dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; int i; + struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct brw_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - if (intel->gen >= 5) { - struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); - state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); - state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]); - state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]); - state_out(name, sdc, sdc_offset, 5, "half float rg\n"); - state_out(name, sdc, sdc_offset, 6, "half float ba\n"); - state_out(name, sdc, sdc_offset, 7, "u16 rg\n"); - state_out(name, sdc, sdc_offset, 8, "u16 ba\n"); - state_out(name, sdc, sdc_offset, 9, "s16 rg\n"); - state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); - state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); - } else { - struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); - } + samp++; + offset += sizeof(*samp); } - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_gen7_sampler_state(struct brw_context *brw) +static void dump_gen7_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; + struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; int i; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct gen7_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - samp = (struct gen7_sampler_state *) - (intel->batch.bo->virtual + brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - struct brw_sampler_default_color *sdc = - intel->batch.bo->virtual + brw->wm.sdc_offset[i]; - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); + samp++; + offset += sizeof(*samp); } drm_intel_bo_unmap(intel->batch.bo); } @@ -459,6 +389,7 @@ dump_state_batch(struct brw_context *brw) for (i = 0; i < brw->state_batch_count; i++) { uint32_t offset = brw->state_batch_list[i].offset; + uint32_t size = brw->state_batch_list[i].size; switch (brw->state_batch_list[i].type) { case AUB_TRACE_CLIP_VP_STATE: @@ -483,6 +414,23 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_BLEND_STATE: dump_blend_state(brw, offset); break; + case AUB_TRACE_SURFACE_STATE: + if (intel->gen < 7) { + dump_surface_state(brw, offset); + } else { + dump_gen7_surface_state(brw, offset); + } + break; + case AUB_TRACE_SAMPLER_STATE: + if (intel->gen < 7) { + dump_sampler_state(brw, offset, size); + } else { + dump_gen7_sampler_state(brw, offset, size); + } + break; + case AUB_TRACE_SAMPLER_DEFAULT_COLOR: + dump_sdc(brw, offset); + break; default: break; } @@ -507,13 +455,6 @@ void brw_debug_batch(struct intel_context *intel) brw->intel.batch.bo, brw->wm.bind_bo_offset, 4 * brw->wm.nr_surfaces); - if (intel->gen < 7) { - dump_wm_surface_state(brw); - dump_wm_sampler_state(brw); - } else { - dump_gen7_surface_state(brw); - dump_gen7_sampler_state(brw); - } if (intel->gen < 6) state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, From 6bbaa7c0e58cd477deb970cd52d281bb9f87486c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 17:01:03 -0700 Subject: [PATCH 078/110] i965: Dump the binding table using state_batch_list[]. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 24 +++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index a7ead641605..3750ec6ec34 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -354,6 +354,22 @@ static void dump_blend_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 1, "\n"); } +static void dump_binding_table(struct brw_context *brw, uint32_t offset, + uint32_t size) +{ + char name[20]; + int i; + uint32_t *data = brw->intel.batch.bo->virtual + offset; + + for (i = 0; i < size / 4; i++) { + if (data[i] == 0) + continue; + + sprintf(name, "BIND%d", i); + batch_out(brw, name, offset, i, "surface state address\n"); + } +} + static void brw_debug_prog(struct brw_context *brw, const char *name, uint32_t prog_offset) { @@ -414,6 +430,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_BLEND_STATE: dump_blend_state(brw, offset); break; + case AUB_TRACE_BINDING_TABLE: + dump_binding_table(brw, offset, size); + break; case AUB_TRACE_SURFACE_STATE: if (intel->gen < 7) { dump_surface_state(brw, offset); @@ -451,11 +470,6 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", - brw->intel.batch.bo, - brw->wm.bind_bo_offset, - 4 * brw->wm.nr_surfaces); - if (intel->gen < 6) state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, sizeof(struct brw_vs_unit_state)); From 93c7a5da1121a4aebff2fc889af229cbfb7006f5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jun 2011 17:11:00 -0700 Subject: [PATCH 079/110] i965: When dumping programs, dump the whole cache. But don't by default. The previous brw_state_dump output was rather useless -- last used program per batch, and just the hex. Now we dump all programs (since we don't know which were used), and disassemble them. But that's a ton of spam, and usually when looking into program contents we use INTEL_DEBUG={vs,wm,misc,other} and when looking into state updates we use INTEL_DEBUG=batch, so this dump usually just massively clutters up the output. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 68 +++++++++++++++------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 3750ec6ec34..391f73dea64 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -370,28 +370,55 @@ static void dump_binding_table(struct brw_context *brw, uint32_t offset, } } -static void brw_debug_prog(struct brw_context *brw, - const char *name, uint32_t prog_offset) +static void +dump_prog_cache(struct brw_context *brw) { - unsigned int i; + struct intel_context *intel = &brw->intel; + struct brw_cache *cache = &brw->cache; + unsigned int b, i; uint32_t *data; drm_intel_bo_map(brw->cache.bo, false); - data = brw->cache.bo->virtual + prog_offset; + for (b = 0; b < cache->size; b++) { + struct brw_cache_item *item; - for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)brw->cache.bo->offset + i * 4 * 4, - data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); - /* Stop at the end of the program. It'd be nice to keep track of the actual - * intended program size instead of guessing like this. - */ - if (data[i * 4 + 0] == 0 && - data[i * 4 + 1] == 0 && - data[i * 4 + 2] == 0 && - data[i * 4 + 3] == 0) - break; + for (item = cache->items[b]; item; item = item->next) { + const char *name; + uint32_t offset = item->offset; + + data = brw->cache.bo->virtual + item->offset; + + switch (item->cache_id) { + case BRW_VS_PROG: + name = "VS kernel"; + break; + case BRW_GS_PROG: + name = "GS kernel"; + break; + case BRW_CLIP_PROG: + name = "CLIP kernel"; + break; + case BRW_SF_PROG: + name = "SF kernel"; + break; + case BRW_WM_PROG: + name = "WM kernel"; + break; + default: + name = "unknown"; + break; + } + + for (i = 0; i < item->size / 4 / 4; i++) { + fprintf(stderr, "0x%08x: %8s: 0x%08x 0x%08x 0x%08x 0x%08x ", + offset + i * 4 * 4, + name, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + + brw_disasm(stderr, (void *)(data + i * 4), intel->gen); + } + } } drm_intel_bo_unmap(brw->cache.bo); @@ -473,27 +500,24 @@ void brw_debug_batch(struct intel_context *intel) if (intel->gen < 6) state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, sizeof(struct brw_vs_unit_state)); - brw_debug_prog(brw, "VS prog", brw->vs.prog_offset); if (intel->gen < 6) state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, sizeof(struct brw_gs_unit_state)); - if (brw->gs.prog_active) { - brw_debug_prog(brw, "GS prog", brw->gs.prog_offset); - } if (intel->gen < 6) { state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, sizeof(struct brw_sf_unit_state)); - brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); } if (intel->gen < 6) state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, sizeof(struct brw_wm_unit_state)); - brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); drm_intel_bo_map(intel->batch.bo, false); dump_state_batch(brw); drm_intel_bo_unmap(intel->batch.bo); + + if (0) + dump_prog_cache(brw); } From 6e17a01e427bb621f606b4237fae58c446de9a70 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Jul 2011 13:02:28 -0700 Subject: [PATCH 080/110] i965/gen6: Add state dumping for the scissor packet. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 391f73dea64..e78e48f9ee9 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -354,6 +354,19 @@ static void dump_blend_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 1, "\n"); } +static void +dump_scissor(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SCISSOR"; + struct intel_context *intel = &brw->intel; + struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", + scissor->xmin, scissor->ymin); + batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", + scissor->xmax, scissor->ymax); +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -477,6 +490,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_SAMPLER_DEFAULT_COLOR: dump_sdc(brw, offset); break; + case AUB_TRACE_SCISSOR_STATE: + dump_scissor(brw, offset); + break; default: break; } From a1226bcd20a582146f1848cc0af8145c53b0ecbc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Jul 2011 13:35:42 -0700 Subject: [PATCH 081/110] i965/gen6: Add state dumping for the VS constants. This is quite a bit of spam, but I think it's useful to have in a full INTEL_DEBUG=batch dump. And a lot of this spam on glxgears is just because we're awful at handling our constants :/ Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_state_dump.c | 20 ++++++++++++++++++++ src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1a918f47b26..471015cf9d0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -211,6 +211,7 @@ enum state_struct_type { AUB_TRACE_NO_TYPE = 0x100, AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, }; /** Subclass of Mesa vertex program */ diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index e78e48f9ee9..0758cb51a6a 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -367,6 +367,23 @@ dump_scissor(struct brw_context *brw, uint32_t offset) scissor->xmax, scissor->ymax); } +static void +dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "VS_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -493,6 +510,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_SCISSOR_STATE: dump_scissor(brw, offset); break; + case AUB_TRACE_VS_CONSTANTS: + dump_vs_constants(brw, offset, size); + break; default: break; } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index ecdf5e4f614..fb4cdbaadf9 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -60,7 +60,7 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) float *param; int i; - param = brw_state_batch(brw, AUB_TRACE_NO_TYPE, + param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, (MAX_CLIP_PLANES + nr_params) * 4 * sizeof(float), 32, &brw->vs.push_const_offset); From f7f03fb588414928ef75cb856aa683737dad6213 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Jul 2011 14:09:47 -0700 Subject: [PATCH 082/110] i965/gen4: Add state dumping for unit state using state_batch_list[]. This is just barely more pretty-printing than we previously had, but at least it doesn't leave out unit states in the log. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 169 ++++++++++++++------- 1 file changed, 112 insertions(+), 57 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 0758cb51a6a..cc2c36c7e99 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -35,28 +35,6 @@ static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, int index, char *fmt, ...) PRINTFLIKE(5, 6); -/** - * Prints out a header, the contents, and the message associated with - * the hardware state data given. - * - * \param name Name of the state object - * \param data Pointer to the base of the state object - * \param hw_offset Hardware offset of the base of the state data. - * \param index Index of the DWORD being output. - */ -static void -state_out(const char *name, void *data, uint32_t hw_offset, int index, - char *fmt, ...) -{ - va_list va; - - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); - va_start(va, fmt); - vfprintf(stderr, fmt, va); - va_end(va); -} - static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, int index, char *fmt, ...) @@ -72,24 +50,6 @@ batch_out(struct brw_context *brw, const char *name, uint32_t offset, va_end(va); } -/** Generic, undecoded state buffer debug printout */ -static void -state_struct_out(const char *name, drm_intel_bo *buffer, - unsigned int offset, unsigned int size) -{ - int i; - - if (buffer == NULL) - return; - - drm_intel_bo_map(buffer, GL_FALSE); - for (i = 0; i < size / 4; i++) { - state_out(name, buffer->virtual + offset, buffer->offset + offset, i, - "dword %d\n", i); - } - drm_intel_bo_unmap(buffer); -} - static const char * get_965_surfacetype(unsigned int surfacetype) { @@ -117,6 +77,103 @@ get_965_surface_format(unsigned int surface_format) } } +static void dump_vs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "VS_STATE"; + struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + vs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} + +static void dump_gs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "GS_STATE"; + struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + gs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} + +static void dump_clip_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP_STATE"; + struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + clip->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "clip5\n"); + batch_out(brw, name, offset, 6, "clip6\n"); + batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); + batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); + batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); + batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); +} + +static void dump_sf_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_STATE"; + struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + sf->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "sf5\n"); + batch_out(brw, name, offset, 6, "sf6\n"); + batch_out(brw, name, offset, 7, "sf7\n"); +} + +static void dump_wm_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "WM_STATE"; + struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "wm4\n"); + batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", + wm->wm5.enable_8_pix ? "8pix" : "", + wm->wm5.enable_16_pix ? "16pix" : "", + wm->wm5.program_uses_depth ? ", uses depth" : "", + wm->wm5.program_computes_depth ? ", computes depth" : "", + wm->wm5.program_uses_killpixel ? ", kills" : "", + wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", + wm->wm5.max_threads + 1); + batch_out(brw, name, offset, 6, "depth offset constant %f\n", + wm->global_depth_offset_constant); + batch_out(brw, name, offset, 7, "depth offset scale %f\n", + wm->global_depth_offset_scale); + batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); + batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); + batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); +} + static void dump_surface_state(struct brw_context *brw, uint32_t offset) { const char *name = "SURF"; @@ -465,6 +522,21 @@ dump_state_batch(struct brw_context *brw) uint32_t size = brw->state_batch_list[i].size; switch (brw->state_batch_list[i].type) { + case AUB_TRACE_VS_STATE: + dump_vs_state(brw, offset); + break; + case AUB_TRACE_GS_STATE: + dump_gs_state(brw, offset); + break; + case AUB_TRACE_CLIP_STATE: + dump_clip_state(brw, offset); + break; + case AUB_TRACE_SF_STATE: + dump_sf_state(brw, offset); + break; + case AUB_TRACE_WM_STATE: + dump_wm_state(brw, offset); + break; case AUB_TRACE_CLIP_VP_STATE: dump_clip_viewport_state(brw, offset); break; @@ -533,23 +605,6 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - if (intel->gen < 6) - state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, - sizeof(struct brw_vs_unit_state)); - - if (intel->gen < 6) - state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, - sizeof(struct brw_gs_unit_state)); - - if (intel->gen < 6) { - state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, - sizeof(struct brw_sf_unit_state)); - } - - if (intel->gen < 6) - state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, - sizeof(struct brw_wm_unit_state)); - drm_intel_bo_map(intel->batch.bo, false); dump_state_batch(brw); drm_intel_bo_unmap(intel->batch.bo); From f07cfebebe0b902431fb33104b70a531d57a0570 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Jul 2011 14:15:57 -0700 Subject: [PATCH 083/110] i965/gen4: Add a stub dumper for CC unit state, which is different from gen6. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state_dump.c | 23 +++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index cc2c36c7e99..b9e5cc1a534 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -141,7 +141,7 @@ static void dump_sf_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 3, "thread3\n"); batch_out(brw, name, offset, 4, "thread4: %d threads\n", sf->thread4.max_threads + 1); - batch_out(brw, name, offset, 5, "sf5\n"); + batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); batch_out(brw, name, offset, 6, "sf6\n"); batch_out(brw, name, offset, 7, "sf7\n"); } @@ -384,7 +384,21 @@ static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) ds->ds2.depth_write_enable ? "en" : "dis"); } -static void dump_cc_state(struct brw_context *brw, uint32_t offset) +static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) +{ + const char *name = "CC"; + + batch_out(brw, name, offset, 0, "cc0\n"); + batch_out(brw, name, offset, 1, "cc1\n"); + batch_out(brw, name, offset, 2, "cc2\n"); + batch_out(brw, name, offset, 3, "cc3\n"); + batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); + batch_out(brw, name, offset, 5, "cc5\n"); + batch_out(brw, name, offset, 6, "cc6\n"); + batch_out(brw, name, offset, 7, "cc7\n"); +} + +static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; @@ -554,7 +568,10 @@ dump_state_batch(struct brw_context *brw) dump_depth_stencil_state(brw, offset); break; case AUB_TRACE_CC_STATE: - dump_cc_state(brw, offset); + if (intel->gen >= 6) + dump_cc_state_gen6(brw, offset); + else + dump_cc_state_gen4(brw, offset); break; case AUB_TRACE_BLEND_STATE: dump_blend_state(brw, offset); From 5edb3ddf4191b31b4a4e43a85fe1bfbd62a8cb17 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 29 Jun 2011 15:05:52 -0700 Subject: [PATCH 084/110] i965/gen7: Refactor SF setup a bit to handle overrides in one place. This is exactly analogous to Eric's Gen6 change in commit e7280b16d634e1f434bebbce83996b3d30d0419c. Signed-off-by: Eric Anholt Signed-off-by: Kenneth Graunke NOTE: This is a candidate for the 7.11 branch. --- src/mesa/drivers/dri/i965/gen7_sf_state.c | 43 +++++++++++++---------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 99efe96a1fa..aeeb4f26555 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -39,11 +39,12 @@ upload_sbe_state(struct brw_context *brw) uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw10, dw11; int i; - int attr = 0; + int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_start = ctx->Transform.ClipPlanesEnabled ? 2 : 1; /* _NEW_LIGHT */ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* FINISHME: Attribute Swizzle Control Mode? */ dw1 = @@ -71,30 +72,34 @@ upload_sbe_state(struct brw_context *brw) ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); } + /* Create the mapping from the FS inputs we produce to the VS outputs + * they source from. + */ + for (; attr < FRAG_ATTRIB_MAX; attr++) { + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + assert(input_index < 16 || attr == input_index); + + attr_overrides[input_index++] = get_attr_override(brw, attr, + two_side_color); + } + + for (; attr < FRAG_ATTRIB_MAX; attr++) + attr_overrides[input_index++] = 0; + BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); OUT_BATCH(dw1); /* Output dwords 2 through 9 */ for (i = 0; i < 8; i++) { - uint32_t attr_overrides = 0; - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color); - attr++; - break; - } - } - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; - attr++; - break; - } - } - OUT_BATCH(attr_overrides); + OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw10); /* point sprite texcoord bitmask */ From 147d0102952b7c0b16906c08da2ae447ec68185a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 29 Jun 2011 15:11:22 -0700 Subject: [PATCH 085/110] i965/gen7: Fix point sprite texture coordinate overrides. This is exactly analogous to Eric's Gen6 change in commit f304bb8a5d040d99db47a65813d216d11c66fb47. His explanation: "We were assuming that the input attribute n to the FS was FRAG_ATTRIB_TEXn, which happened to be true often enough for our testcases." Signed-off-by: Eric Anholt Signed-off-by: Kenneth Graunke NOTE: This is a candidate for the 7.11 branch. --- src/mesa/drivers/dri/i965/gen7_sf_state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index aeeb4f26555..e36c44aa3fd 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -58,12 +58,6 @@ upload_sbe_state(struct brw_context *brw) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; dw10 = 0; - if (ctx->Point.PointSprite) { - for (i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - dw10 |= (1 << i); - } - } /* _NEW_LIGHT (flat shading) */ dw11 = 0; @@ -79,6 +73,12 @@ upload_sbe_state(struct brw_context *brw) if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) continue; + if (ctx->Point.PointSprite && + attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 && + ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { + dw10 |= (1 << input_index); + } + /* The hardware can only do the overrides on 16 overrides at a * time, and the other up to 16 have to be lined up so that the * input index = the output index. We'll need to do some From 186e37c75454965e1a58298e878a9585f4866749 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 29 Jun 2011 15:18:55 -0700 Subject: [PATCH 086/110] i965/gen7: Add support for gl_PointCoord. This is exactly analogous to Eric's Gen6 change in commit 6861a701772eac3a6a7d3136d03efa7ac7e5c026. His explanation: "This is just like PointSprite overrides, but it's always on for that attribute." Fixes glsl-fs-pointcoord and gtf/point_sprites. Signed-off-by: Eric Anholt Signed-off-by: Kenneth Graunke NOTE: This is a candidate for the 7.11 branch. --- src/mesa/drivers/dri/i965/gen7_sf_state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index e36c44aa3fd..0f97cea652d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -79,6 +79,9 @@ upload_sbe_state(struct brw_context *brw) dw10 |= (1 << input_index); } + if (attr == FRAG_ATTRIB_PNTC) + dw10 |= (1 << input_index); + /* The hardware can only do the overrides on 16 overrides at a * time, and the other up to 16 have to be lined up so that the * input index = the output index. We'll need to do some From 7304909d6537e00252347a10d0bae54ffd77ff91 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 9 Jul 2011 00:20:34 -0700 Subject: [PATCH 087/110] glsl: Reject ambiguous function calls (multiple inexact matches). According to the GLSL 1.20 specification, "it is a semantic error if there are multiple ways to apply [implicit] conversions [...] such that the call can be made to match multiple signatures." Fixes a regression caused by 60eb63a855cb89962f2d5bb91e238ff2d1ab8702, which implemented the wrong policy of finding a "closest" match. However, this is not a revert, since the original code failed to continue looking for an exact match once it found two inexact matches. It's OK to have multiple inexact matches if there's also an exact match. NOTE: This is a candidate for the 7.10 and 7.11 branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38971 Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/glsl/ir_function.cpp | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 1255072a571..0f2f1a0eea4 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -165,8 +165,18 @@ ir_function_signature * ir_function::matching_signature(const exec_list *actual_parameters) { ir_function_signature *match = NULL; - int matched_score = 0; + bool multiple_inexact_matches = false; + /* From page 42 (page 49 of the PDF) of the GLSL 1.20 spec: + * + * "If an exact match is found, the other signatures are ignored, and + * the exact match is used. Otherwise, if no exact match is found, then + * the implicit conversions in Section 4.1.10 "Implicit Conversions" will + * be applied to the calling arguments if this can make their types match + * a signature. In this case, it is a semantic error if there are + * multiple ways to apply these conversions to the actual arguments of a + * call such that the call can be made to match multiple signatures." + */ foreach_iter(exec_list_iterator, iter, signatures) { ir_function_signature *const sig = (ir_function_signature *) iter.get(); @@ -178,13 +188,24 @@ ir_function::matching_signature(const exec_list *actual_parameters) if (score == 0) return sig; - /* If we found a match with fewer conversions, use that instead */ - if (score > 0 && (match == NULL || score < matched_score)) { - match = sig; - matched_score = score; + if (score > 0) { + if (match == NULL) + match = sig; + else + multiple_inexact_matches = true; } } + /* There is no exact match (we would have returned it by now). If there + * are multiple inexact matches, the call is ambiguous, which is an error. + * + * FINISHME: Report a decent error. Returning NULL will likely result in + * FINISHME: a "no matching signature" error; it should report that the + * FINISHME: call is ambiguous. But reporting errors from here is hard. + */ + if (multiple_inexact_matches) + return NULL; + return match; } From e4189f2e2e310276136dc06af20062986920e8e2 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 11 Jul 2011 12:48:06 -0700 Subject: [PATCH 088/110] gallivm: Remove LLVMOpaqueKindType case with llvm-3.0. llvm-3.0svn r134829 removed LLVMOpaqueKindType from enum LLVMTypeKind in include/llvm-c/Core.h. --- src/gallium/auxiliary/gallivm/lp_bld_type.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index c5cf6d4a6c4..11a2b055cc2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -325,8 +325,10 @@ lp_typekind_name(LLVMTypeKind t) return "LLVMArrayTypeKind"; case LLVMPointerTypeKind: return "LLVMPointerTypeKind"; +#if HAVE_LLVM < 0x0300 case LLVMOpaqueTypeKind: return "LLVMOpaqueTypeKind"; +#endif case LLVMVectorTypeKind: return "LLVMVectorTypeKind"; case LLVMMetadataTypeKind: From 278b832b785c0a76a6a367ce2f8807923c740208 Mon Sep 17 00:00:00 2001 From: David Heidelberger Date: Mon, 11 Jul 2011 21:50:24 +0200 Subject: [PATCH 089/110] nvfx: handle PIPE_CAP_SM3 Signed-off-by: David Heidelberger --- src/gallium/drivers/nvfx/nvfx_screen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 7a013a916b9..8569b5a190b 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -33,6 +33,9 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + /* TODO: >= nv4x support Shader Model 3.0 */ + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: From a09b7f7f150d1687a614fd59cf09ec5e3c3a13fd Mon Sep 17 00:00:00 2001 From: Andrew Randrianasulu Date: Mon, 11 Jul 2011 22:05:13 +0200 Subject: [PATCH 090/110] dri/nouveau: nv10: fix vertex format for GL_UNSIGNED_BYTE Broken accidentally in f4efc256fd90beaff86321e4c6ce00f9be55092d, the switch to rnn headers. NV10TCL_VTXFMT_TYPE_BYTE_RGBA became U8_UNORM but B8G8R8A8_UNORM was used instead. --- src/mesa/drivers/dri/nouveau/nv10_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c index 20fb4478426..6134650346d 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_render.c +++ b/src/mesa/drivers/dri/nouveau/nv10_render.c @@ -99,7 +99,7 @@ get_hw_format(int type) case GL_UNSIGNED_SHORT: return NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM; case GL_UNSIGNED_BYTE: - return NV10_3D_VTXBUF_FMT_TYPE_B8G8R8A8_UNORM; + return NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM; default: assert(0); } From 1844ae7e7e2285f52dc0ecd42194ccc47e3137ef Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 11 Jul 2011 14:08:24 -0700 Subject: [PATCH 091/110] gallivm: Re-enable LLVMUnionTypeKind case for llvm-2.7 only. LLVMUnionTypeKind is not in llvm-2.6, llvm-2.8, llvm-2.9, or llvm-3.0svn. --- src/gallium/auxiliary/gallivm/lp_bld_type.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index 11a2b055cc2..efd159f8869 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -333,10 +333,10 @@ lp_typekind_name(LLVMTypeKind t) return "LLVMVectorTypeKind"; case LLVMMetadataTypeKind: return "LLVMMetadataTypeKind"; - /* Only in LLVM 2.7 and later??? +#if HAVE_LLVM == 0x0207 case LLVMUnionTypeKind: return "LLVMUnionTypeKind"; - */ +#endif default: return "unknown LLVMTypeKind"; } From ab4d629613d5a7a8a94f3fe3f1acac347f39e2ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 19:54:27 +0200 Subject: [PATCH 092/110] st/mesa: derive a stencil sampler format from the actual texture format Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_cb_drawpixels.c | 70 +++++++++-------------- 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index dca3324645c..f25656acb49 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -272,26 +272,6 @@ make_passthrough_vertex_shader(struct st_context *st, } -/** - * Return a texture base format for drawing/copying an image - * of the given format. - */ -static GLenum -base_format(GLenum format) -{ - switch (format) { - case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; - case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; - case GL_STENCIL_INDEX: - return GL_STENCIL_INDEX; - default: - return GL_RGBA; - } -} - - /** * Return a texture internalFormat for drawing/copying an image * of the given format and type. @@ -999,7 +979,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2]; int num_sampler_view = 1; - enum pipe_format stencil_format = PIPE_FORMAT_NONE; struct st_fp_variant *fpv; if (format == GL_DEPTH_STENCIL) @@ -1014,28 +993,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, /* can we write to stencil if not fallback */ if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) goto stencil_fallback; - - tex_format = st_choose_format(st->pipe->screen, base_format(format), - GL_NONE, GL_NONE, - PIPE_TEXTURE_2D, - 0, PIPE_BIND_SAMPLER_VIEW); - - switch (tex_format) { - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - stencil_format = PIPE_FORMAT_X24S8_USCALED; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - stencil_format = PIPE_FORMAT_S8X24_USCALED; - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: - stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; - break; - case PIPE_FORMAT_S8_USCALED: - stencil_format = PIPE_FORMAT_S8_USCALED; - break; - default: - goto stencil_fallback; - } } /* Mesa state should be up to date by now */ @@ -1080,7 +1037,32 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, sv[0] = st_create_texture_sampler_view(st->pipe, pt); if (sv[0]) { - if (write_stencil) { + /* Create a second sampler view to read stencil. + * The stencil is written using the shader stencil export + * functionality. */ + if (write_stencil) { + enum pipe_format stencil_format = PIPE_FORMAT_NONE; + + switch (pt->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X24S8_USCALED: + stencil_format = PIPE_FORMAT_X24S8_USCALED; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_S8X24_USCALED: + stencil_format = PIPE_FORMAT_S8X24_USCALED; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + case PIPE_FORMAT_X32_S8X24_USCALED: + stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; + break; + case PIPE_FORMAT_S8_USCALED: + stencil_format = PIPE_FORMAT_S8_USCALED; + break; + default: + assert(0); + } + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, stencil_format); num_sampler_view++; From 2df4b6117b27acb4ea46222b47353ffb9f513204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 18:32:48 +0200 Subject: [PATCH 093/110] st/mesa: get rid of unnecessary 'goto' in DrawPixels Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_cb_drawpixels.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f25656acb49..d3e6aefdb79 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -988,11 +988,12 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, else if (format == GL_DEPTH_COMPONENT) write_depth = GL_TRUE; - if (write_stencil) { - enum pipe_format tex_format; - /* can we write to stencil if not fallback */ - if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) - goto stencil_fallback; + if (write_stencil && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) { + /* software fallback */ + draw_stencil_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; } /* Mesa state should be up to date by now */ @@ -1083,11 +1084,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, pipe_resource_reference(&pt, NULL); } } - return; - -stencil_fallback: - draw_stencil_pixels(ctx, x, y, width, height, format, type, - unpack, pixels); } From db311b45beb60630ad3e3c803631c2b6c1472347 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 22 Jun 2011 11:35:27 -0700 Subject: [PATCH 094/110] configure.ac: Make --{without,with}-gallium-drivers work as expected This version is mostly Dan's post to the mesa-dev mailing list on 6/22/2011. NOTE: This is a candidate for the 7.10 and 7.11 branches. Signed-off-by: Ian Romanick Reviewed-by: Dan Nicholson --- configure.ac | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/configure.ac b/configure.ac index 6a5bb37e6cd..77372735c1a 100644 --- a/configure.ac +++ b/configure.ac @@ -617,6 +617,13 @@ AC_ARG_WITH([gallium-drivers], [with_gallium_drivers="$withval"], [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"]) +# Doing '--without-gallium-drivers' will set this variable to 'no'. Clear it +# here so that the script doesn't choke on an unknown driver name later. +case "$with_gallium_drivers" in + yes) with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT" ;; + no) with_gallium_drivers='' ;; +esac + if test "x$enable_opengl" = xno -a \ "x$enable_gles1" = xno -a \ "x$enable_gles2" = xno -a \ From c369fb42ee00598396e16faf1cd98995bf72c0b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jul 2011 23:38:49 +0200 Subject: [PATCH 095/110] st/mesa: choose a matching depth internal format for DrawPixels This makes it easier to hit the fast path and get a float format when we ask for it. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_cb_drawpixels.c | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d3e6aefdb79..1d908c0317a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -281,11 +281,36 @@ internal_format(struct gl_context *ctx, GLenum format, GLenum type) { switch (format) { case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; + switch (type) { + case GL_UNSIGNED_SHORT: + return GL_DEPTH_COMPONENT16; + + case GL_UNSIGNED_INT: + return GL_DEPTH_COMPONENT32; + + case GL_FLOAT: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT32F; + else + return GL_DEPTH_COMPONENT; + + default: + return GL_DEPTH_COMPONENT; + } + case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; + switch (type) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return GL_DEPTH32F_STENCIL8; + + case GL_UNSIGNED_INT_24_8: + default: + return GL_DEPTH24_STENCIL8; + } + case GL_STENCIL_INDEX: return GL_STENCIL_INDEX; + default: if (_mesa_is_integer_format(format)) { switch (type) { From 4ef9c3d21b65ff0a9280d3f3ad4c45f0442c6c4f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 Jul 2011 15:58:29 +0100 Subject: [PATCH 096/110] autoconf: Do not select Xlib when building DRI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Chia-I Wu said 'There are two libGL providers, Xlib and DRI based they cannot coexist' Signed-off-by: Emil Velikov Signed-off-by: Marek Olšák --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 77372735c1a..eace790d84d 100644 --- a/configure.ac +++ b/configure.ac @@ -798,7 +798,7 @@ esac if test "x$enable_dri" = xyes; then DRIVER_DIRS="$DRIVER_DIRS dri" - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib sw/dri" + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/dri" GALLIUM_STATE_TRACKERS_DIRS="dri $GALLIUM_STATE_TRACKERS_DIRS" HAVE_ST_DRI="yes" fi From 1ab5e1524258042e26c769221f61da16403d0cf6 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 11 Jul 2011 15:42:15 +0100 Subject: [PATCH 097/110] st/mesa: check if _mesa_create_context() returns NULL In some cases _mesa_create_context() can return NULL an in the mesa state tracker, we do not concider the case, which may cause issues within st_create_context_priv() This patch adds a simple check (similar to the one in the dri drivers) Signed-off-by: Emil Velikov Signed-off-by: Brian Paul --- src/mesa/state_tracker/st_context.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 6eddbfc88e4..6d4bc544d0c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -179,6 +179,9 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, st_init_driver_functions(&funcs); ctx = _mesa_create_context(api, visual, shareCtx, &funcs, NULL); + if (!ctx) { + return NULL; + } /* XXX: need a capability bit in gallium to query if the pipe * driver prefers DP4 or MUL/MAD for vertex transformation. From 8f6c207024c1458a70da3cbc9a9c9f48cf1a41df Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jul 2011 09:47:48 -0600 Subject: [PATCH 098/110] u_upload_mgr: add missing offset to src map in u_upload_buffer() Fixes regression from dda8d7ac3f0e5148d9738a57f7bc03216f6514d1 --- src/gallium/auxiliary/util/u_upload_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index e50db6d67fe..71fe53e3a27 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -257,7 +257,7 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, ret = u_upload_data( upload, min_out_offset, size, - map, + map + offset, out_offset, outbuf, flushed ); From 5d0d8366f9ab0ca4c52eb927ab476953daa77f1e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 12 Jul 2011 12:00:10 -0400 Subject: [PATCH 099/110] r600g: emit SQ_LDS_RESOURCE_MGMT Need to be initialized to a reasonable value as compute code may change it. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39119 NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/evergreen_state.c | 5 +++++ src/gallium/drivers/r600/evergreend.h | 7 +++++++ src/gallium/winsys/r600/drm/evergreen_hw_context.c | 1 + 3 files changed, 13 insertions(+) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fbf25feaf20..4605c833dea 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2026,6 +2026,11 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + tmp = 0; + tmp |= S_008E2C_NUM_PS_LDS(0x1000); + tmp |= S_008E2C_NUM_LS_LDS(0x1000); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index d795f5757ed..96dbd4da91b 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -216,6 +216,13 @@ #define S_008C28_NUM_LS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) #define G_008C28_NUM_LS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) #define C_008C28_NUM_LS_STACK_ENTRIES(x) 0xF000FFFF +#define R_008E2C_SQ_LDS_RESOURCE_MGMT 0x00008E2C +#define S_008E2C_NUM_PS_LDS(x) (((x) & 0xFFFF) << 0) +#define G_008E2C_NUM_PS_LDS(x) (((x) >> 0) & 0xFFFF) +#define C_008E2C_NUM_PS_LDS(x) 0x0000FFFF +#define S_008E2C_NUM_LS_LDS(x) (((x) & 0xFFFF) << 16) +#define G_008E2C_NUM_LS_LDS(x) (((x) >> 16) & 0xFFFF) +#define C_008E2C_NUM_LS_LDS(x) 0xFFFF0000 #define R_008CF0_SQ_MS_FIFO_SIZES 0x00008CF0 #define S_008CF0_CACHE_FIFO_SIZE(x) (((x) & 0xFF) << 0) diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 4d9dd505c41..60d2e289396 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -55,6 +55,7 @@ static const struct r600_reg evergreen_config_reg_list[] = { {R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008E2C_SQ_LDS_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, }; From aceb66951d467844f2d722cf5d6bd6c429207cd2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Jul 2011 10:29:38 -0700 Subject: [PATCH 100/110] intel: Use _mesa_tex_target_to_face() helper function instead of our own. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/intel/intel_tex_image.c | 25 +++----------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 269faefa1c0..1f8b885bbec 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -120,25 +120,6 @@ intel_miptree_create_for_teximage(struct intel_context *intel, expect_accelerated_upload); } - - - -static GLuint -target_to_face(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ @@ -426,7 +407,7 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; if (_mesa_is_format_compressed(texImage->TexFormat)) { @@ -835,7 +816,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -893,7 +874,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); From 6e6b38860488a0b2b282866f095cea9860503a14 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Jul 2011 11:32:04 -0700 Subject: [PATCH 101/110] i915: Fix map/unmap mismatches from leaving INTEL_FALLBACK during TNL. The first rendering after context create didn't know of the color buffer yet, triggering a sw fallback. The intel_prepare_render() from intelSpanRenderStart then found the buffer and turned off fallbacks, but intelSpanRenderFinish was never called and things were left mapped. By checking buffers before making the call on whether to do the fallback pipeline or not, we avoid the fallback change inside of the rendering pipeline. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=31561 Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_tris.c | 13 +++++++++++++ src/mesa/drivers/dri/intel/intel_context.h | 1 + 2 files changed, 14 insertions(+) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 7bcb72f42d0..941c4350ddd 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1078,6 +1078,13 @@ intelRunPipeline(struct gl_context * ctx) if (ctx->NewState) _mesa_update_state_locked(ctx); + /* We need to get this done before we start the pipeline, or a + * change in the INTEL_FALLBACK() of its intel_draw_buffers() call + * while the pipeline is running will result in mismatched swrast + * map/unmaps, and later assertion failures. + */ + intel_prepare_render(intel); + if (intel->NewGLState) { if (intel->NewGLState & _NEW_TEXTURE) { intel->vtbl.update_texture_state(intel); @@ -1092,7 +1099,9 @@ intelRunPipeline(struct gl_context * ctx) } intel_map_vertex_shader_textures(ctx); + intel->tnl_pipeline_running = true; _tnl_run_pipeline(ctx); + intel->tnl_pipeline_running = false; intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); @@ -1228,6 +1237,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) if (mode) { intel->Fallback |= bit; if (oldfallback == 0) { + assert(!intel->tnl_pipeline_running); + intel_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "ENTER FALLBACK %x: %s\n", @@ -1239,6 +1250,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) else { intel->Fallback &= ~bit; if (oldfallback == bit) { + assert(!intel->tnl_pipeline_running); + _swrast_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit)); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 148fb0c2c9a..1727badb704 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -199,6 +199,7 @@ struct intel_context drm_intel_bo *first_post_swapbuffers_batch; GLboolean need_throttle; GLboolean no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ struct { From 9a82d89a8fafde1b65c0843e022e99864b6d56b1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Jul 2011 11:34:56 -0700 Subject: [PATCH 102/110] i915: Use _mesa_get_format_name to describe translate_tex_format() fail. I don't want to go count up to what format number 29 is. Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i915/i830_texstate.c | 3 ++- src/mesa/drivers/dri/i915/i915_texstate.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index d4af5e51026..71ce44fd5c9 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -76,7 +76,8 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index bcf42d59969..90ebee3846d 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -89,7 +89,8 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) else return (MAPSURF_32BIT | MT_32BIT_x8L24); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } From 6aae729d6ec3cb2d5677120742c1180e38815482 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Jul 2011 11:42:00 -0700 Subject: [PATCH 103/110] i915: Fix depth texturing since 86e62b2357447b7c97f434be4834f4b50aa0764d The 965 driver already had the X8_Z24 case, but 915 was missing it. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/i915_texstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 90ebee3846d..7cd6820cd51 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -82,6 +82,7 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_X8_Z24: if (DepthMode == GL_ALPHA) return (MAPSURF_32BIT | MT_32BIT_x8A24); else if (DepthMode == GL_INTENSITY) From 898be7d5acc27e2e0dfa8c2f27dc4fd7085fb476 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Jul 2011 16:49:44 -0700 Subject: [PATCH 104/110] mesa: Fix assertion failure in X8_Z24/Z24_X8 texfetch. Reviewed-by: Ian Romanick --- src/mesa/main/texfetch_tmp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/texfetch_tmp.h b/src/mesa/main/texfetch_tmp.h index 3b1eedf39bf..d170adf2e00 100644 --- a/src/mesa/main/texfetch_tmp.h +++ b/src/mesa/main/texfetch_tmp.h @@ -2287,7 +2287,8 @@ static void FETCH(f_z24_s8)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) >> 8) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8); + ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8 || + texImage->TexFormat == MESA_FORMAT_Z24_X8); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } @@ -2314,7 +2315,8 @@ static void FETCH(f_s8_z24)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) & 0x00ffffff) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24); + ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24 || + texImage->TexFormat == MESA_FORMAT_X8_Z24); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } From f2fd0d63046c41559c5dfca9ebdc5d33c0ae4177 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Jul 2011 16:50:06 -0700 Subject: [PATCH 105/110] i915: Fix NPOT compressed textures on 915. We were failing at rounding, misplacing the non-baselevels. Fixes: 3DFX_texture_compression_FXT1/fbo-generate-mipmaps ARB_texture_compression/fbo-generate-mipmaps EXT_texture_compression_s3tc/fbo-generate-mipmaps Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/i915_tex_layout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index 6e4512129cd..e6a47116223 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -219,9 +219,9 @@ i915_miptree_layout_2d(struct intel_context *intel, width, height, 1); if (mt->compressed) - img_height = MAX2(1, height / 4); + img_height = ALIGN(height, 4) / 4; else - img_height = (MAX2(2, height) + 1) & ~1; + img_height = ALIGN(height, 2); mt->total_height += img_height; From ed570cb5e52021cfeb36266189367c85befcc761 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 13 Jul 2011 00:09:36 +0200 Subject: [PATCH 106/110] i915g: fixup context desdruction Reported-by: Christopher Egert Signed-off-by: Daniel Vetter --- src/gallium/drivers/i915/i915_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 28ff40a2328..1b30309bb58 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -99,11 +99,11 @@ static void i915_destroy(struct pipe_context *pipe) struct i915_context *i915 = i915_context(pipe); int i; - draw_destroy(i915->draw); - if (i915->blitter) util_blitter_destroy(i915->blitter); + draw_destroy(i915->draw); + if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); From 556a47a2621073185be83a0a721a8ba93392bedb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 12 Jul 2011 15:31:39 -0700 Subject: [PATCH 107/110] i915: Add support for gl_FragData[0] for output color. We advertised ARB_draw_buffers, but either fell back to software when using this output, or assertion failed. Fixes glsl-fs-fragdata-1, and failures in some webgl conformance tests. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39024 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34906 --- src/mesa/drivers/dri/i915/i915_fragprog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index b67ebb9a1ec..e9e8078328a 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -210,6 +210,7 @@ get_result_vector(struct i915_fragment_program *p, case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: return UREG(REG_TYPE_OC, 0); case FRAG_RESULT_DEPTH: p->depth_written = 1; From 5fe5d236c26b3b2428bc7395304e40cf21d3d3e1 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 13 Jul 2011 15:25:46 +0800 Subject: [PATCH 108/110] targets/egl-static: fix a linking error rbug is always linked in and it needs libpthread. --- src/gallium/targets/egl-static/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 832d7ba438f..69e7eecdf0c 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -42,7 +42,7 @@ egl_CPPFLAGS += \ -I$(TOP)/src/egl/main \ -D_EGL_MAIN=_eglMain egl_LIBS += $(TOP)/src/gallium/state_trackers/egl/libegl.a -egl_SYS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) -lEGL -lm +egl_SYS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) -lEGL -lm -lpthread # EGL platforms ifneq ($(findstring x11, $(EGL_PLATFORMS)),) From a0a22fead5e08b9ebfd2b64e6262e1e803a8e927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 13 Jul 2011 16:35:19 +0200 Subject: [PATCH 109/110] r600g: prevent hardware blitting based on resource usage It doesn't make much sense for STAGING and STREAM resources to be hardware blitted into VRAM. --- src/gallium/drivers/r600/r600_texture.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 37e75be6cf2..10c32c53a6d 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -313,7 +313,14 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, PIPE_BIND_SAMPLER_VIEW)) return FALSE; - return TRUE; + switch (res->usage) { + case PIPE_USAGE_STREAM: + case PIPE_USAGE_STAGING: + return FALSE; + + default: + return TRUE; + } } static boolean r600_texture_get_handle(struct pipe_screen* screen, From 4f4855b249cbcb77900e9767041becf255afbba1 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 13 Jul 2011 09:01:04 +0400 Subject: [PATCH 110/110] st/mesa: flush bitmap cache on query and conditional render boundaries Bitmap caching shouldn't affect the results of the queries and conditional render. NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Vadim Girlin Signed-off-by: Brian Paul --- src/mesa/state_tracker/st_cb_condrender.c | 5 +++++ src/mesa/state_tracker/st_cb_queryobj.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 64c6c117fca..1ced560e160 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -41,6 +41,7 @@ #include "st_context.h" #include "st_cb_queryobj.h" #include "st_cb_condrender.h" +#include "st_cb_bitmap.h" /** @@ -55,6 +56,8 @@ st_BeginConditionalRender(struct gl_context *ctx, struct gl_query_object *q, struct pipe_context *pipe = st->pipe; uint m; + st_flush_bitmap_cache(st); + switch (mode) { case GL_QUERY_WAIT: m = PIPE_RENDER_COND_WAIT; @@ -90,6 +93,8 @@ st_EndConditionalRender(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st->pipe; (void) q; + st_flush_bitmap_cache(st); + pipe->render_condition(pipe, NULL, 0); st->render_condition = NULL; } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index d0ac253bcec..057499615bf 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "st_context.h" #include "st_cb_queryobj.h" +#include "st_cb_bitmap.h" #if FEATURE_queryobj @@ -83,6 +84,8 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q) struct st_query_object *stq = st_query_object(q); unsigned type; + st_flush_bitmap_cache(st_context(ctx)); + /* convert GL query type to Gallium query type */ switch (q->Target) { case GL_ANY_SAMPLES_PASSED: @@ -128,6 +131,8 @@ st_EndQuery(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st_context(ctx)->pipe; struct st_query_object *stq = st_query_object(q); + st_flush_bitmap_cache(st_context(ctx)); + pipe->end_query(pipe, stq->pq); }