mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
This rollbacks the revert of this same patch introduced in
commit 7b9c15628a.
And also squahes the following patch to prevent a piglit regression caused
by this change:
intel/compiler: Fix lower_conversions for 8-bit types.
Author: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
For 8-bit types the execution type is word. A byte raw MOV has 16-bit
execution type and 8-bit destination and it shouldn't be considered
a conversion case. So there is no need to change alignment and enter
in lower_conversions for these instructions.
Fixes a regresion in the piglit test "glsl-fs-shader-stencil-export"
that is introduced with this patch from the Vulkan shaderInt16 series:
'i965/compiler: handle conversion to smaller type in the lowering
pass for that'. The problem is caused because there is already a case
in the driver that injects Byte instructions like this:
mov(8) g127<1>UB g2<32,8,4>UB
And the aforementioned pass was not accounting for the special
handling of the execution size of Byte instructions. This patch
fixes this.
v2: (Jason Ekstrand)
- Simplify is_byte_raw_mov, include reference to PRM and not
consider B <-> UB conversions as raw movs.
v3: (Matt Turner)
- Indentation style fixes.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106393
Tested-by: Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
129 lines
4.7 KiB
C++
129 lines
4.7 KiB
C++
/*
|
|
* Copyright © 2015 Connor Abbott
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "brw_fs.h"
|
|
#include "brw_cfg.h"
|
|
#include "brw_fs_builder.h"
|
|
|
|
using namespace brw;
|
|
|
|
static bool
|
|
supports_type_conversion(const fs_inst *inst) {
|
|
switch (inst->opcode) {
|
|
case BRW_OPCODE_MOV:
|
|
case SHADER_OPCODE_MOV_INDIRECT:
|
|
return true;
|
|
case BRW_OPCODE_SEL:
|
|
return inst->dst.type == get_exec_type(inst);
|
|
default:
|
|
/* FIXME: We assume the opcodes don't explicitly mentioned
|
|
* before just work fine with arbitrary conversions.
|
|
*/
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/* From the SKL PRM Vol 2a, "Move":
|
|
*
|
|
* "A mov with the same source and destination type, no source modifier,
|
|
* and no saturation is a raw move. A packed byte destination region (B
|
|
* or UB type with HorzStride == 1 and ExecSize > 1) can only be written
|
|
* using raw move."
|
|
*/
|
|
static bool
|
|
is_byte_raw_mov (const fs_inst *inst)
|
|
{
|
|
return type_sz(inst->dst.type) == 1 &&
|
|
inst->opcode == BRW_OPCODE_MOV &&
|
|
inst->src[0].type == inst->dst.type &&
|
|
!inst->saturate &&
|
|
!inst->src[0].negate &&
|
|
!inst->src[0].abs;
|
|
}
|
|
|
|
bool
|
|
fs_visitor::lower_conversions()
|
|
{
|
|
bool progress = false;
|
|
|
|
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
|
const fs_builder ibld(this, block, inst);
|
|
fs_reg dst = inst->dst;
|
|
bool saturate = inst->saturate;
|
|
|
|
if (supports_type_conversion(inst)) {
|
|
if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
|
|
!is_byte_raw_mov(inst)) {
|
|
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
|
|
* Single Precision Float":
|
|
*
|
|
* The upper Dword of every Qword will be written with undefined
|
|
* value when converting DF to F.
|
|
*
|
|
* So we need to allocate a temporary that's two registers, and then do
|
|
* a strided MOV to get the lower DWord of every Qword that has the
|
|
* result.
|
|
*
|
|
* This restriction applies, in general, whenever we convert to
|
|
* a type with a smaller bit-size.
|
|
*/
|
|
fs_reg temp = ibld.vgrf(get_exec_type(inst));
|
|
fs_reg strided_temp = subscript(temp, dst.type, 0);
|
|
|
|
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
|
|
inst->dst = strided_temp;
|
|
inst->saturate = false;
|
|
/* As it is an strided destination, we write n-times more being n the
|
|
* size ratio between source and destination types. Update
|
|
* size_written accordingly.
|
|
*/
|
|
inst->size_written = inst->dst.component_size(inst->exec_size);
|
|
ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate;
|
|
|
|
progress = true;
|
|
}
|
|
} else {
|
|
fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
|
|
|
|
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
|
|
inst->dst = temp0;
|
|
/* As it is an strided destination, we write n-times more being n the
|
|
* size ratio between source and destination types. Update
|
|
* size_written accordingly.
|
|
*/
|
|
inst->size_written = inst->dst.component_size(inst->exec_size);
|
|
inst->saturate = false;
|
|
/* Now, do the conversion to original destination's type. In next iteration,
|
|
* we will lower it if it is a d2f conversion.
|
|
*/
|
|
ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
|
|
|
|
progress = true;
|
|
}
|
|
}
|
|
|
|
if (progress)
|
|
invalidate_live_intervals();
|
|
|
|
return progress;
|
|
}
|