mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
i965/vec4: make offset() work in terms of a simd width and scalar components
So that it has the same semantics as the scalar backend implementation. The
helper will now take a simd width (which is always 8 in vec4 mode) and step
as many scalar components as specified by that width, respecting the size of
the scalar channels.
v2 (Curro):
- Remove the assertion in offset(), byte_offset() has the same checks.
- Use byte_offset() directly instead of add_byte_offset().
- Make things more clear by explicitly including the vertical stride
in the byte offset expression.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
parent
ba63db1f2e
commit
66fcfa6894
3 changed files with 16 additions and 18 deletions
|
|
@ -104,12 +104,11 @@ byte_offset(src_reg reg, unsigned bytes)
|
|||
}
|
||||
|
||||
static inline src_reg
|
||||
offset(src_reg reg, unsigned delta)
|
||||
offset(src_reg reg, unsigned width, unsigned delta)
|
||||
{
|
||||
assert(delta == 0 ||
|
||||
(reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
|
||||
reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
|
||||
return reg;
|
||||
const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
|
||||
const unsigned num_components = MAX2(width / 4 * stride, 4);
|
||||
return byte_offset(reg, num_components * type_sz(reg.type) * delta);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -180,12 +179,11 @@ byte_offset(dst_reg reg, unsigned bytes)
|
|||
}
|
||||
|
||||
static inline dst_reg
|
||||
offset(dst_reg reg, unsigned delta)
|
||||
offset(dst_reg reg, unsigned width, unsigned delta)
|
||||
{
|
||||
assert(delta == 0 ||
|
||||
(reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
|
||||
reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
|
||||
return reg;
|
||||
const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
|
||||
const unsigned num_components = MAX2(width / 4 * stride, 4);
|
||||
return byte_offset(reg, num_components * type_sz(reg.type) * delta);
|
||||
}
|
||||
|
||||
static inline dst_reg
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
|
|||
dst_reg reg;
|
||||
|
||||
reg = v->nir_locals[nir_reg->index];
|
||||
reg = offset(reg, base_offset);
|
||||
reg = offset(reg, 8, base_offset);
|
||||
if (indirect) {
|
||||
reg.reladdr =
|
||||
new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
|
||||
|
|
|
|||
|
|
@ -42,9 +42,9 @@ namespace {
|
|||
DIV_ROUND_UP(size * dst_stride, 4));
|
||||
|
||||
for (unsigned i = 0; i < size; ++i)
|
||||
bld.MOV(writemask(offset(dst, i * dst_stride / 4),
|
||||
bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
|
||||
1 << (i * dst_stride % 4)),
|
||||
swizzle(offset(src, i * src_stride / 4),
|
||||
swizzle(offset(src, 8, i * src_stride / 4),
|
||||
brw_swizzle_for_mask(1 << (i * src_stride % 4))));
|
||||
|
||||
return src_reg(dst);
|
||||
|
|
@ -124,16 +124,16 @@ namespace brw {
|
|||
unsigned n = 0;
|
||||
|
||||
if (header_sz)
|
||||
bld.exec_all().MOV(offset(payload, n++),
|
||||
bld.exec_all().MOV(offset(payload, 8, n++),
|
||||
retype(header, BRW_REGISTER_TYPE_UD));
|
||||
|
||||
for (unsigned i = 0; i < addr_sz; i++)
|
||||
bld.MOV(offset(payload, n++),
|
||||
offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
|
||||
bld.MOV(offset(payload, 8, n++),
|
||||
offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
|
||||
|
||||
for (unsigned i = 0; i < src_sz; i++)
|
||||
bld.MOV(offset(payload, n++),
|
||||
offset(retype(src, BRW_REGISTER_TYPE_UD), i));
|
||||
bld.MOV(offset(payload, 8, n++),
|
||||
offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
|
||||
|
||||
/* Reduce the dynamically uniform surface index to a single
|
||||
* scalar.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue