i965/fs: Factor out universally broken calculation of the register component size.

This in principle simple calculation was being open-coded in a number
of places (in a series I haven't yet sent for review there will be a
couple more), all of them were subtly broken in one way or another:
None of them were handling the HW_REG case correctly as pointed out by
Connor, and fs_inst::regs_read() was handling the stride=0 case rather
naively.  This patch solves both problems and factors out the
calculation as a new fs_reg method.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
This commit is contained in:
Francisco Jerez 2015-07-14 15:43:44 +03:00
parent b00cd6e4a0
commit 4bddd82bf3
4 changed files with 23 additions and 12 deletions

View file

@ -78,8 +78,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case HW_REG:
case MRF:
case ATTR:
this->regs_written =
DIV_ROUND_UP(MAX2(exec_size * dst.stride, 1) * type_sz(dst.type), 32);
this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
REG_SIZE);
break;
case BAD_FILE:
this->regs_written = 0;
@ -443,6 +443,15 @@ fs_reg::is_contiguous() const
return stride == 1;
}
unsigned
fs_reg::component_size(unsigned width) const
{
const unsigned stride = (file != HW_REG ? this->stride :
fixed_hw_reg.hstride == 0 ? 0 :
1 << (fixed_hw_reg.hstride - 1));
return MAX2(width * stride, 1) * type_sz(type);
}
int
fs_visitor::type_size(const struct glsl_type *type)
{
@ -702,12 +711,8 @@ fs_inst::regs_read(int arg) const
return 1;
case GRF:
case HW_REG:
if (src[arg].stride == 0) {
return 1;
} else {
int size = components * this->exec_size * type_sz(src[arg].type);
return DIV_ROUND_UP(size * src[arg].stride, 32);
}
return DIV_ROUND_UP(components * src[arg].component_size(exec_size),
REG_SIZE);
case MRF:
unreachable("MRF registers are not allowed as sources");
default:

View file

@ -70,14 +70,14 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
break;
case GRF:
case MRF:
case HW_REG:
case ATTR:
return byte_offset(reg,
delta * MAX2(bld.dispatch_width() * reg.stride, 1) *
type_sz(reg.type));
delta * reg.component_size(bld.dispatch_width()));
case UNIFORM:
reg.reg_offset += delta;
break;
default:
case IMM:
assert(delta == 0);
}
return reg;

View file

@ -1601,7 +1601,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
/* If the instruction writes to more than one register, it needs to
* be a "compressed" instruction on Gen <= 5.
*/
if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32)
if (inst->dst.component_size(inst->exec_size) > REG_SIZE)
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
else
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);

View file

@ -48,6 +48,12 @@ public:
bool equals(const fs_reg &r) const;
bool is_contiguous() const;
/**
* Return the size in bytes of a single logical component of the
* register assuming the given execution width.
*/
unsigned component_size(unsigned width) const;
/** Smear a channel of the reg to all channels. */
fs_reg &set_smear(unsigned subreg);