mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-23 17:00:22 +01:00
intel/fs: Pass builders instead of blocks into emit_[un]zip
This makes it far more explicit where we're inserting the instructions
rather than the magic "before and after" stuff that the emit_[un]zip
helpers did based on block and inst.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Cc: mesa-stable@lists.freedesktop.org
(cherry picked from commit fcd4adb9d0)
This commit is contained in:
parent
9db1478039
commit
b0b9db69d8
1 changed files with 35 additions and 26 deletions
|
|
@ -5021,12 +5021,10 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
|
|||
/**
|
||||
* Extract the data that would be consumed by the channel group given by
|
||||
* lbld.group() from the i-th source region of instruction \p inst and return
|
||||
* it as result in packed form. If any copy instructions are required they
|
||||
* will be emitted before the given \p inst in \p block.
|
||||
* it as result in packed form.
|
||||
*/
|
||||
static fs_reg
|
||||
emit_unzip(const fs_builder &lbld, bblock_t *block, fs_inst *inst,
|
||||
unsigned i)
|
||||
emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
|
||||
{
|
||||
/* Specified channel group from the source region. */
|
||||
const fs_reg src = horiz_offset(inst->src[i], lbld.group());
|
||||
|
|
@ -5041,8 +5039,7 @@ emit_unzip(const fs_builder &lbld, bblock_t *block, fs_inst *inst,
|
|||
const fs_reg tmp = lbld.vgrf(inst->src[i].type, inst->components_read(i));
|
||||
|
||||
for (unsigned k = 0; k < inst->components_read(i); ++k)
|
||||
cbld.at(block, inst)
|
||||
.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));
|
||||
cbld.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));
|
||||
|
||||
return tmp;
|
||||
|
||||
|
|
@ -5108,40 +5105,51 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
|
|||
/**
|
||||
* Insert data from a packed temporary into the channel group given by
|
||||
* lbld.group() of the destination region of instruction \p inst and return
|
||||
* the temporary as result. If any copy instructions are required they will
|
||||
* be emitted around the given \p inst in \p block.
|
||||
* the temporary as result. Any copy instructions that are required for
|
||||
* unzipping the previous value (in the case of partial writes) will be
|
||||
* inserted using \p lbld_before and any copy instructions required for
|
||||
* zipping up the destination of \p inst will be inserted using \p lbld_after.
|
||||
*/
|
||||
static fs_reg
|
||||
emit_zip(const fs_builder &lbld, bblock_t *block, fs_inst *inst)
|
||||
emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
|
||||
fs_inst *inst)
|
||||
{
|
||||
/* Builder of the right width to perform the copy avoiding uninitialized
|
||||
* data if the lowered execution size is greater than the original
|
||||
* execution size of the instruction.
|
||||
*/
|
||||
const fs_builder cbld = lbld.group(MIN2(lbld.dispatch_width(),
|
||||
inst->exec_size), 0);
|
||||
assert(lbld_before.dispatch_width() == lbld_after.dispatch_width());
|
||||
assert(lbld_before.group() == lbld_after.group());
|
||||
|
||||
/* Specified channel group from the destination region. */
|
||||
const fs_reg dst = horiz_offset(inst->dst, lbld.group());
|
||||
const fs_reg dst = horiz_offset(inst->dst, lbld_after.group());
|
||||
const unsigned dst_size = inst->size_written /
|
||||
inst->dst.component_size(inst->exec_size);
|
||||
|
||||
if (needs_dst_copy(lbld, inst)) {
|
||||
const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size);
|
||||
if (needs_dst_copy(lbld_after, inst)) {
|
||||
const fs_reg tmp = lbld_after.vgrf(inst->dst.type, dst_size);
|
||||
|
||||
if (inst->predicate) {
|
||||
/* Handle predication by copying the original contents of
|
||||
* the destination into the temporary before emitting the
|
||||
* lowered instruction.
|
||||
*/
|
||||
for (unsigned k = 0; k < dst_size; ++k)
|
||||
cbld.at(block, inst)
|
||||
.MOV(offset(tmp, lbld, k), offset(dst, inst->exec_size, k));
|
||||
const fs_builder gbld_before =
|
||||
lbld_before.group(MIN2(lbld_before.dispatch_width(),
|
||||
inst->exec_size), 0);
|
||||
for (unsigned k = 0; k < dst_size; ++k) {
|
||||
gbld_before.MOV(offset(tmp, lbld_before, k),
|
||||
offset(dst, inst->exec_size, k));
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < dst_size; ++k)
|
||||
cbld.at(block, inst->next)
|
||||
.MOV(offset(dst, inst->exec_size, k), offset(tmp, lbld, k));
|
||||
const fs_builder gbld_after =
|
||||
lbld_after.group(MIN2(lbld_after.dispatch_width(),
|
||||
inst->exec_size), 0);
|
||||
for (unsigned k = 0; k < dst_size; ++k) {
|
||||
/* Use a builder of the right width to perform the copy avoiding
|
||||
* uninitialized data if the lowered execution size is greater than
|
||||
* the original execution size of the instruction.
|
||||
*/
|
||||
gbld_after.MOV(offset(dst, inst->exec_size, k),
|
||||
offset(tmp, lbld_after, k));
|
||||
}
|
||||
|
||||
return tmp;
|
||||
|
||||
|
|
@ -5197,9 +5205,10 @@ fs_visitor::lower_simd_width()
|
|||
const fs_builder lbld = ibld.group(lower_width, i);
|
||||
|
||||
for (unsigned j = 0; j < inst->sources; j++)
|
||||
split_inst.src[j] = emit_unzip(lbld, block, inst, j);
|
||||
split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j);
|
||||
|
||||
split_inst.dst = emit_zip(lbld, block, inst);
|
||||
split_inst.dst = emit_zip(lbld.at(block, inst),
|
||||
lbld.at(block, inst->next), inst);
|
||||
split_inst.size_written =
|
||||
split_inst.dst.component_size(lower_width) * dst_size;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue