From cb415d4df644a8caffe861626dec5f7aa4cefa49 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 1 Oct 2012 15:28:55 -0700 Subject: [PATCH] i965/vs: Fix unit mismatch in scratch base_offset parameter. move_grf_array_access_to_scratch() calculates scratch buffer offsets in bytes. However, emit_scratch_read/write() expects the base_offset parameter to be measured in OWords. As a result, a shader using a scratch read/write offset greater than zero (in practice, a shader containing more than one variable in scratch) would use too large an offset, frequently exceeding the available scratch space. This patch corrects the mismatch by removing spurious conversion from OWords to bytes in move_grf_array_access_to_scratch(). This is based on a patch by Paul Berry. NOTE: This is a candidate for stable release branches. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt (cherry picked from commit 46e529672bb124b78eb454cbf55c72074ef6d35c) --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 8 ++++++-- src/mesa/drivers/dri/i965/brw_vs.c | 2 +- src/mesa/drivers/dri/i965/brw_vs.h | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 682837ffa6a..59428a19210 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2443,6 +2443,8 @@ vec4_visitor::get_pull_constant_offset(vec4_instruction *inst, /** * Emits an instruction before @inst to load the value named by @orig_src * from scratch space at @base_offset to @temp. + * + * @base_offset is measured in 32-byte units (the size of a register). */ void vec4_visitor::emit_scratch_read(vec4_instruction *inst, @@ -2458,6 +2460,8 @@ vec4_visitor::emit_scratch_read(vec4_instruction *inst, /** * Emits an instruction after @inst to store the value to be written * to @orig_dst to scratch space at @base_offset, from @temp. + * + * @base_offset is measured in 32-byte units (the size of a register). */ void vec4_visitor::emit_scratch_write(vec4_instruction *inst, @@ -2501,7 +2505,7 @@ vec4_visitor::move_grf_array_access_to_scratch() if (inst->dst.file == GRF && inst->dst.reladdr && scratch_loc[inst->dst.reg] == -1) { scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; + c->last_scratch += this->virtual_grf_sizes[inst->dst.reg]; } for (int i = 0 ; i < 3; i++) { @@ -2510,7 +2514,7 @@ vec4_visitor::move_grf_array_access_to_scratch() if (src->file == GRF && src->reladdr && scratch_loc[src->reg] == -1) { scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + c->last_scratch += this->virtual_grf_sizes[src->reg]; } } } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 710ffe858c6..071312c5b3d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -263,7 +263,7 @@ do_vs_prog(struct brw_context *brw, "Try reducing the number of live vec4 values to " "improve performance.\n"); - c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); + c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch*REG_SIZE); brw_get_scratch_bo(intel, &brw->vs.scratch_bo, c.prog_data.total_scratch * brw->max_vs_threads); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index a68a620b1f2..af562adf271 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -93,7 +93,7 @@ struct brw_vs_compile { GLuint nr_inputs; GLuint first_output; - GLuint last_scratch; + GLuint last_scratch; /**< measured in 32-byte (register size) units */ GLuint first_tmp; GLuint last_tmp;