mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-05 03:40:28 +01:00
i965/fs: Extend region width calculation to allow arbitrary execution sizes.
Instead of just halving the execution size when the instruction is compressed hoping that it will give a legal source region width, we can calculate the maximum legal width value in closed form from the component size and stride. This makes sure that brw_reg_from_fs_reg() always returns a valid hardware region even for virtual 32-wide instructions (e.g. send-like instructions) that would seem to exceed the hardware region width limit after halving. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
dabaf4fb96
commit
0b4cd91071
1 changed files with 23 additions and 16 deletions
|
|
@ -65,27 +65,34 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
|
|||
case VGRF:
|
||||
if (reg->stride == 0) {
|
||||
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
|
||||
} else if (!compressed &&
|
||||
inst->exec_size * reg->stride * type_sz(reg->type) <= 32) {
|
||||
brw_reg = brw_vecn_reg(inst->exec_size, brw_file_from_reg(reg),
|
||||
reg->nr, 0);
|
||||
brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
|
||||
inst->exec_size, reg->stride);
|
||||
} else {
|
||||
/* From the Haswell PRM:
|
||||
*
|
||||
* VertStride must be used to cross GRF register boundaries. This
|
||||
* rule implies that elements within a 'Width' cannot cross GRF
|
||||
* boundaries.
|
||||
* "VertStride must be used to cross GRF register boundaries. This
|
||||
* rule implies that elements within a 'Width' cannot cross GRF
|
||||
* boundaries."
|
||||
*
|
||||
* So, for registers that are large enough, we have to split the exec
|
||||
* size in two and trust the compression state to sort it out.
|
||||
* The maximum width value that could satisfy this restriction is:
|
||||
*/
|
||||
assert(inst->exec_size / 2 * reg->stride * type_sz(reg->type) <= 32);
|
||||
brw_reg = brw_vecn_reg(inst->exec_size / 2, brw_file_from_reg(reg),
|
||||
reg->nr, 0);
|
||||
brw_reg = stride(brw_reg, inst->exec_size / 2 * reg->stride,
|
||||
inst->exec_size / 2, reg->stride);
|
||||
const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type));
|
||||
|
||||
/* Because the hardware can only split source regions at a whole
|
||||
* multiple of width during decompression (i.e. vertically), clamp
|
||||
* the value obtained above to the physical execution size of a
|
||||
* single decompressed chunk of the instruction:
|
||||
*/
|
||||
const unsigned phys_width = compressed ? inst->exec_size / 2 :
|
||||
inst->exec_size;
|
||||
|
||||
/* XXX - The equation above is strictly speaking not correct on
|
||||
* hardware that supports unbalanced GRF writes -- On Gen9+
|
||||
* each decompressed chunk of the instruction may have a
|
||||
* different execution size when the number of components
|
||||
* written to each destination GRF is not the same.
|
||||
*/
|
||||
const unsigned width = MIN2(reg_width, phys_width);
|
||||
brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
|
||||
brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
|
||||
}
|
||||
|
||||
brw_reg = retype(brw_reg, reg->type);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue