mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 13:10:10 +01:00
spir-v: Mark floating point comparisons exact
OpenGL GLSL, OpenGL ARB assembly shaders, and DX9 are pretty loose about the behavior in the presence of NaNs. Many GPUs that implement these specifications do not even have a representation of NaN. However, OpenCL and Vulkan SPIR-V are not so lax. Both actually have some required behavior in the presence of NaN, and, of the two, OpenCL is the most strict. For years we have implemented SPIR-V by using the same comparison opcodes as we use for OpenGL GLSL and OpenGL assembly shaders. This has repeatedly caused problems where an optimization that is valid in the NaN-relaxed world is not valid in Vulkan or OpenCL. To fix this, set the "exact" flag on comparisons instructions generated from SPIR-V. This will block optimizations that may have different NaN behavior. v2: Set the exact flag in the nir_builder, not in the vtn_builder. v3: Add an assertion in vtn_handle_constant that the exact flag wasn't set (because it's ignored). Rebase on80163bbec3("nir/vtn: Support OpOrdered and OpUnordered opcodes"). Mark the NIR generated for those opcodes as exact as well. v4: s/unused_exact/exact/ in a couple places, and assert that exact has the expected value (true in one place, false in the other). Suggested by Caio. Closes: #3345 Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Tested-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Fixes:8513b12590("nir/opt_if: split ALU from Phi more aggressively") This commit doesn't really fix anything in8513b12590. However, without8513b12590, a regression is triggered in RADV on No Man's Sky. I want to ensure that this change is only applied on top of8513b12590, and Fixes: seems the safest way to do that. No shader-db changes on any Intel platform. This only affects SPIR-V, and we have no OpenGL SPIR-V shaders in shader-db. 124 shaders in Shadow of the Tomb Raider (Steam "native") were hurt by 1 spill and 1 fill each. All Intel platforms had similar results. (Tiger Lake shown) Instructions in all programs: 155668276 -> 155685764 (+0.0%) SENDs in all programs: 6474570 -> 6474570 (+0.0%) Loops in all programs: 35271 -> 35271 (+0.0%) Cycles in all programs: 3198055373 -> 3198628031 (+0.0%) Spills in all programs: 231522 -> 231646 (+0.1%) Fills in all programs: 347571 -> 347695 (+0.0%) Vega Totals: SGPRs: 20955712 -> 20956756 (+0.00%); split: -0.02%, +0.03% VGPRs: 13476920 -> 13473132 (-0.03%); split: -0.07%, +0.04% CodeSize: 613371940 -> 613339348 (-0.01%); split: -0.06%, +0.05% MaxWaves: 3111886 -> 3112481 (+0.02%); split: +0.02%, -0.00% Instrs: 120723785 -> 120746991 (+0.02%); split: -0.04%, +0.06% Cycles: 626658992 -> 626862708 (+0.03%); split: -0.05%, +0.08% VMEM: 216330854 -> 216343196 (+0.01%); split: +0.04%, -0.04% SMEM: 32079391 -> 32081972 (+0.01%); split: +0.05%, -0.04% VClause: 2688784 -> 2688789 (+0.00%); split: -0.03%, +0.03% SClause: 6554669 ->6556251(+0.02%); split: -0.01%, +0.03% Copies: 5356667 -> 5353283 (-0.06%); split: -0.36%, +0.29% Branches: 954466 -> 954716 (+0.03%); split: -0.01%, +0.04% PreSGPRs: 9078300 -> 9081626 (+0.04%); split: -0.01%, +0.05% PreVGPRs: 10972090 -> 10966576 (-0.05%); split: -0.06%, +0.01% Totals from 48239 (12.08% of 399432) affected shaders: SGPRs: 2713984 -> 2715028 (+0.04%); split: -0.16%, +0.19% VGPRs: 1997804 -> 1994016 (-0.19%); split: -0.46%, +0.27% CodeSize: 172094092 -> 172061500 (-0.02%); split: -0.21%, +0.19% MaxWaves: 337327 -> 337922 (+0.18%); split: +0.20%, -0.02% Instrs: 33053657 -> 33076863 (+0.07%); split: -0.15%, +0.22% Cycles: 254961228 -> 255164944 (+0.08%); split: -0.12%, +0.20% VMEM: 15165226 -> 15177568 (+0.08%); split: +0.59%, -0.51% SMEM: 3304938 -> 3307519 (+0.08%); split: +0.49%, -0.41% VClause: 766225 -> 766230 (+0.00%); split: -0.12%, +0.12% SClause: 1332645 -> 1334227 (+0.12%); split: -0.04%, +0.16% Copies: 2040651 -> 2037267 (-0.17%); split: -0.94%, +0.77% Branches: 743668 -> 743918 (+0.03%); split: -0.01%, +0.05% PreSGPRs: 1697667 -> 1700993 (+0.20%); split: -0.07%, +0.27% PreVGPRs: 1718424 -> 1712910 (-0.32%); split: -0.39%, +0.07% Polaris Totals: SGPRs: 21349172 -> 21354376 (+0.02%); split: -0.02%, +0.04% VGPRs: 13690680 -> 13686920 (-0.03%); split: -0.07%, +0.04% CodeSize: 613745824 -> 613704988 (-0.01%); split: -0.06%, +0.05% MaxWaves: 2775012 -> 2775189 (+0.01%); split: +0.01%, -0.00% Instrs: 120735079 -> 120756209 (+0.02%); split: -0.04%, +0.06% Cycles: 627906100 -> 628076156 (+0.03%); split: -0.05%, +0.08% VMEM: 216623065 -> 216641838 (+0.01%); split: +0.04%, -0.04% SMEM: 32295618 -> 32299338 (+0.01%); split: +0.05%, -0.04% VClause: 2711025 -> 2711141 (+0.00%); split: -0.03%, +0.04% SClause: 6545185 -> 6546769 (+0.02%); split: -0.01%, +0.03% Copies: 5387723 -> 5383249 (-0.08%); split: -0.37%, +0.29% Branches: 953775 -> 953954 (+0.02%); split: -0.01%, +0.03% PreSGPRs: 9148814 -> 9153211 (+0.05%); split: -0.01%, +0.06% PreVGPRs: 11029429 -> 11023915 (-0.05%); split: -0.06%, +0.01% Totals from 48239 (12.00% of 402052) affected shaders: SGPRs: 2682056 -> 2687260 (+0.19%); split: -0.16%, +0.35% VGPRs: 1994436 -> 1990676 (-0.19%); split: -0.46%, +0.27% CodeSize: 170857060 -> 170816224 (-0.02%); split: -0.21%, +0.19% MaxWaves: 295429 -> 295606 (+0.06%); split: +0.07%, -0.01% Instrs: 32808802 -> 32829932 (+0.06%); split: -0.16%, +0.22% Cycles: 254633252 -> 254803308 (+0.07%); split: -0.13%, +0.20% VMEM: 14897934 -> 14916707 (+0.13%); split: +0.65%, -0.52% SMEM: 3289726 -> 3293446 (+0.11%); split: +0.53%, -0.42% VClause: 775318 -> 775434 (+0.01%); split: -0.11%, +0.13% SClause: 1304867 -> 1306451 (+0.12%); split: -0.04%, +0.16% Copies: 2026334 -> 2021860 (-0.22%); split: -0.99%, +0.77% Branches: 742554 -> 742733 (+0.02%); split: -0.02%, +0.04% PreSGPRs: 1690887 -> 1695284 (+0.26%); split: -0.07%, +0.33% PreVGPRs: 1717709 -> 1712195 (-0.32%); split: -0.40%, +0.07% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6358>
This commit is contained in:
parent
55621c6d1c
commit
010e663cc3
3 changed files with 83 additions and 31 deletions
|
|
@ -2094,9 +2094,16 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
|||
bit_size = glsl_get_bit_size(val->type->type);
|
||||
};
|
||||
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
|
||||
bool exact;
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
|
||||
nir_alu_type_get_type_size(src_alu_type),
|
||||
nir_alu_type_get_type_size(dst_alu_type));
|
||||
|
||||
/* No SPIR-V opcodes handled through this path should set exact.
|
||||
* Since it is ignored, assert on it.
|
||||
*/
|
||||
assert(!exact);
|
||||
|
||||
nir_const_value src[3][NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
for (unsigned i = 0; i < count - 4; i++) {
|
||||
|
|
|
|||
|
|
@ -254,7 +254,7 @@ convert_op_dst_type(SpvOp opcode)
|
|||
|
||||
nir_op
|
||||
vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
||||
SpvOp opcode, bool *swap,
|
||||
SpvOp opcode, bool *swap, bool *exact,
|
||||
unsigned src_bit_size, unsigned dst_bit_size)
|
||||
{
|
||||
/* Indicates that the first two arguments should be swapped. This is
|
||||
|
|
@ -262,6 +262,8 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
|||
*/
|
||||
*swap = false;
|
||||
|
||||
*exact = false;
|
||||
|
||||
switch (opcode) {
|
||||
case SpvOpSNegate: return nir_op_ineg;
|
||||
case SpvOpFNegate: return nir_op_fneg;
|
||||
|
|
@ -319,28 +321,28 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
|||
* the logical operator to use since they also need to check if operands are
|
||||
* ordered.
|
||||
*/
|
||||
case SpvOpFOrdEqual: return nir_op_feq;
|
||||
case SpvOpFUnordEqual: return nir_op_feq;
|
||||
case SpvOpINotEqual: return nir_op_ine;
|
||||
case SpvOpFOrdEqual: *exact = true; return nir_op_feq;
|
||||
case SpvOpFUnordEqual: *exact = true; return nir_op_feq;
|
||||
case SpvOpINotEqual: return nir_op_ine;
|
||||
case SpvOpLessOrGreater: /* Deprecated, use OrdNotEqual */
|
||||
case SpvOpFOrdNotEqual: return nir_op_fneu;
|
||||
case SpvOpFUnordNotEqual: return nir_op_fneu;
|
||||
case SpvOpULessThan: return nir_op_ult;
|
||||
case SpvOpSLessThan: return nir_op_ilt;
|
||||
case SpvOpFOrdLessThan: return nir_op_flt;
|
||||
case SpvOpFUnordLessThan: return nir_op_flt;
|
||||
case SpvOpUGreaterThan: *swap = true; return nir_op_ult;
|
||||
case SpvOpSGreaterThan: *swap = true; return nir_op_ilt;
|
||||
case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt;
|
||||
case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt;
|
||||
case SpvOpULessThanEqual: *swap = true; return nir_op_uge;
|
||||
case SpvOpSLessThanEqual: *swap = true; return nir_op_ige;
|
||||
case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge;
|
||||
case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge;
|
||||
case SpvOpUGreaterThanEqual: return nir_op_uge;
|
||||
case SpvOpSGreaterThanEqual: return nir_op_ige;
|
||||
case SpvOpFOrdGreaterThanEqual: return nir_op_fge;
|
||||
case SpvOpFUnordGreaterThanEqual: return nir_op_fge;
|
||||
case SpvOpFOrdNotEqual: *exact = true; return nir_op_fneu;
|
||||
case SpvOpFUnordNotEqual: *exact = true; return nir_op_fneu;
|
||||
case SpvOpULessThan: return nir_op_ult;
|
||||
case SpvOpSLessThan: return nir_op_ilt;
|
||||
case SpvOpFOrdLessThan: *exact = true; return nir_op_flt;
|
||||
case SpvOpFUnordLessThan: *exact = true; return nir_op_flt;
|
||||
case SpvOpUGreaterThan: *swap = true; return nir_op_ult;
|
||||
case SpvOpSGreaterThan: *swap = true; return nir_op_ilt;
|
||||
case SpvOpFOrdGreaterThan: *swap = true; *exact = true; return nir_op_flt;
|
||||
case SpvOpFUnordGreaterThan: *swap = true; *exact = true; return nir_op_flt;
|
||||
case SpvOpULessThanEqual: *swap = true; return nir_op_uge;
|
||||
case SpvOpSLessThanEqual: *swap = true; return nir_op_ige;
|
||||
case SpvOpFOrdLessThanEqual: *swap = true; *exact = true; return nir_op_fge;
|
||||
case SpvOpFUnordLessThanEqual: *swap = true; *exact = true; return nir_op_fge;
|
||||
case SpvOpUGreaterThanEqual: return nir_op_uge;
|
||||
case SpvOpSGreaterThanEqual: return nir_op_ige;
|
||||
case SpvOpFOrdGreaterThanEqual: *exact = true; return nir_op_fge;
|
||||
case SpvOpFUnordGreaterThanEqual: *exact = true; return nir_op_fge;
|
||||
|
||||
/* Conversions: */
|
||||
case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
|
||||
|
|
@ -554,19 +556,34 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
dest->def = nir_fmul(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpIsNan:
|
||||
dest->def = nir_fneu(&b->nb, src[0], src[0]);
|
||||
break;
|
||||
case SpvOpIsNan: {
|
||||
const bool save_exact = b->nb.exact;
|
||||
|
||||
case SpvOpOrdered:
|
||||
b->nb.exact = true;
|
||||
dest->def = nir_fneu(&b->nb, src[0], src[0]);
|
||||
b->nb.exact = save_exact;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpOrdered: {
|
||||
const bool save_exact = b->nb.exact;
|
||||
|
||||
b->nb.exact = true;
|
||||
dest->def = nir_iand(&b->nb, nir_feq(&b->nb, src[0], src[0]),
|
||||
nir_feq(&b->nb, src[1], src[1]));
|
||||
b->nb.exact = save_exact;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpUnordered:
|
||||
case SpvOpUnordered: {
|
||||
const bool save_exact = b->nb.exact;
|
||||
|
||||
b->nb.exact = true;
|
||||
dest->def = nir_ior(&b->nb, nir_fneu(&b->nb, src[0], src[0]),
|
||||
nir_fneu(&b->nb, src[1], src[1]));
|
||||
b->nb.exact = save_exact;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpIsInf: {
|
||||
nir_ssa_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size);
|
||||
|
|
@ -581,9 +598,11 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
case SpvOpFUnordLessThanEqual:
|
||||
case SpvOpFUnordGreaterThanEqual: {
|
||||
bool swap;
|
||||
bool unused_exact;
|
||||
unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
|
||||
unsigned dst_bit_size = glsl_get_bit_size(dest_type);
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
|
||||
&unused_exact,
|
||||
src_bit_size, dst_bit_size);
|
||||
|
||||
if (swap) {
|
||||
|
|
@ -592,12 +611,18 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
src[1] = tmp;
|
||||
}
|
||||
|
||||
const bool save_exact = b->nb.exact;
|
||||
|
||||
b->nb.exact = true;
|
||||
|
||||
dest->def =
|
||||
nir_ior(&b->nb,
|
||||
nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
|
||||
nir_ior(&b->nb,
|
||||
nir_fneu(&b->nb, src[0], src[0]),
|
||||
nir_fneu(&b->nb, src[1], src[1])));
|
||||
|
||||
b->nb.exact = save_exact;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -608,12 +633,18 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
* ordered so we don’t need to handle it specially.
|
||||
*/
|
||||
bool swap;
|
||||
bool exact;
|
||||
unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
|
||||
unsigned dst_bit_size = glsl_get_bit_size(dest_type);
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
|
||||
src_bit_size, dst_bit_size);
|
||||
|
||||
assert(!swap);
|
||||
assert(exact);
|
||||
|
||||
const bool save_exact = b->nb.exact;
|
||||
|
||||
b->nb.exact = true;
|
||||
|
||||
dest->def =
|
||||
nir_iand(&b->nb,
|
||||
|
|
@ -621,6 +652,8 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
nir_iand(&b->nb,
|
||||
nir_feq(&b->nb, src[0], src[0]),
|
||||
nir_feq(&b->nb, src[1], src[1])));
|
||||
|
||||
b->nb.exact = save_exact;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -676,11 +709,14 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
case SpvOpShiftRightArithmetic:
|
||||
case SpvOpShiftRightLogical: {
|
||||
bool swap;
|
||||
bool exact;
|
||||
unsigned src0_bit_size = glsl_get_bit_size(vtn_src[0]->type);
|
||||
unsigned dst_bit_size = glsl_get_bit_size(dest_type);
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact,
|
||||
src0_bit_size, dst_bit_size);
|
||||
|
||||
assert(!exact);
|
||||
|
||||
assert (op == nir_op_ushr || op == nir_op_ishr || op == nir_op_ishl ||
|
||||
op == nir_op_bitfield_insert || op == nir_op_ubitfield_extract ||
|
||||
op == nir_op_ibitfield_extract);
|
||||
|
|
@ -725,9 +761,11 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
|
||||
default: {
|
||||
bool swap;
|
||||
bool exact;
|
||||
unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
|
||||
unsigned dst_bit_size = glsl_get_bit_size(dest_type);
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
|
||||
&exact,
|
||||
src_bit_size, dst_bit_size);
|
||||
|
||||
if (swap) {
|
||||
|
|
@ -747,7 +785,14 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
break;
|
||||
}
|
||||
|
||||
const bool save_exact = b->nb.exact;
|
||||
|
||||
if (exact)
|
||||
b->nb.exact = true;
|
||||
|
||||
dest->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
|
||||
|
||||
b->nb.exact = save_exact;
|
||||
break;
|
||||
} /* default */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -907,7 +907,7 @@ void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
|
|||
vtn_execution_mode_foreach_cb cb, void *data);
|
||||
|
||||
nir_op vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
||||
SpvOp opcode, bool *swap,
|
||||
SpvOp opcode, bool *swap, bool *exact,
|
||||
unsigned src_bit_size, unsigned dst_bit_size);
|
||||
|
||||
void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue