mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 14:00:16 +01:00
intel/compiler: Signed integer range analysis for imul_32x16 generation
Only iabs and ineg are treated specially. Everything else just uses nir_unsigned_upper_bound. The special treatment of source modifiers is because they cause problems for nir_unsigned_upper_bound. Once those are peeled off, nir_unsigned_upper_bound can generally produce a tighter bound. Future commits will add more opcodes. This mostly introduces the basic framework. v2: Add a bunch of comments to signed_integer_range_analysis. Re-arrange the code a little to reduce duplication. Both suggested by Caio. Rearrange some logic to simplify things. Suggested by Marcin. Tiger Lake, Ice Lake, Haswell, and Ivy Bridge had similar results. (Ice Lake shown) total instructions in shared programs: 19912894 -> 19912558 (<.01%) instructions in affected programs: 109275 -> 108939 (-0.31%) helped: 74 / HURT: 0 total cycles in shared programs: 856422769 -> 856413218 (<.01%) cycles in affected programs: 15268102 -> 15258551 (-0.06%) helped: 65 / HURT: 4 total fills in shared programs: 8218 -> 8217 (-0.01%) fills in affected programs: 1171 -> 1170 (-0.09%) helped: 1 / HURT: 0 Skylake and Broadwell had similar results. (Skylake shown) total cycles in shared programs: 845145547 -> 845142263 (<.01%) cycles in affected programs: 15261465 -> 15258181 (-0.02%) helped: 65 / HURT: 0 Tiger Lake Tiger Lake Instructions in all programs: 157580768 -> 157579730 (-0.0%) Instructions helped: 312 Instructions hurt: 28 Cycles in all programs: 7566977172 -> 7566967746 (-0.0%) Cycles helped: 288 Cycles hurt: 53 Spills in all programs: 19701 -> 19700 (-0.0%) Spills helped: 2 Spills hurt: 4 Fills in all programs: 33311 -> 33335 (+0.1%) Fills helped: 5 Fills hurt: 4 Ice Lake Instructions in all programs: 141998667 -> 141997227 (-0.0%) Instructions helped: 420 Instructions hurt: 3 Cycles in all programs: 9162565297 -> 9162524757 (-0.0%) Cycles helped: 389 Cycles hurt: 29 Spills in all programs: 19918 -> 19916 (-0.0%) Spills helped: 2 Spills hurt: 3 Fills in all programs: 32795 -> 32814 (+0.1%) Fills helped: 6 Fills hurt: 3 Skylake Instructions in all programs: 132567691 -> 132567745 (+0.0%) Instructions hurt: 24 Cycles in all programs: 8828897462 -> 8828889517 (-0.0%) Cycles helped: 405 Cycles hurt: 6 Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17718>
This commit is contained in:
parent
f90d71055b
commit
1b0da3a765
1 changed files with 175 additions and 7 deletions
|
|
@ -28,6 +28,10 @@
|
|||
* Implement a peephole pass to convert integer multiplications to imul32x16.
|
||||
*/
|
||||
|
||||
struct pass_data {
|
||||
struct hash_table *range_ht;
|
||||
};
|
||||
|
||||
static void
|
||||
replace_imul_instr(nir_builder *b, nir_alu_instr *imul, unsigned small_val,
|
||||
nir_op new_opcode)
|
||||
|
|
@ -56,11 +60,106 @@ replace_imul_instr(nir_builder *b, nir_alu_instr *imul, unsigned small_val,
|
|||
nir_instr_free(&imul->instr);
|
||||
}
|
||||
|
||||
enum root_operation {
|
||||
non_unary = 0,
|
||||
integer_neg = 1 << 0,
|
||||
integer_abs = 1 << 1,
|
||||
integer_neg_abs = integer_neg | integer_abs,
|
||||
invalid_root = 255
|
||||
};
|
||||
|
||||
static enum root_operation
|
||||
signed_integer_range_analysis(nir_shader *shader, struct hash_table *range_ht,
|
||||
nir_ssa_scalar scalar, int *lo, int *hi)
|
||||
{
|
||||
if (nir_ssa_scalar_is_const(scalar)) {
|
||||
*lo = nir_ssa_scalar_as_int(scalar);
|
||||
*hi = *lo;
|
||||
return non_unary;
|
||||
}
|
||||
|
||||
if (nir_ssa_scalar_is_alu(scalar)) {
|
||||
switch (nir_ssa_scalar_alu_op(scalar)) {
|
||||
case nir_op_iabs:
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_ssa_scalar_chase_alu_src(scalar, 0),
|
||||
lo, hi);
|
||||
|
||||
if (*lo == INT32_MIN) {
|
||||
*hi = INT32_MAX;
|
||||
} else {
|
||||
const int32_t a = abs(*lo);
|
||||
const int32_t b = abs(*hi);
|
||||
|
||||
*lo = MIN2(a, b);
|
||||
*hi = MAX2(a, b);
|
||||
}
|
||||
|
||||
/* Absolute value wipes out any inner negations, and it is redundant
|
||||
* with any inner absolute values.
|
||||
*/
|
||||
return integer_abs;
|
||||
|
||||
case nir_op_ineg: {
|
||||
const enum root_operation root =
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_ssa_scalar_chase_alu_src(scalar, 0),
|
||||
lo, hi);
|
||||
|
||||
if (*lo == INT32_MIN) {
|
||||
*hi = INT32_MAX;
|
||||
} else {
|
||||
const int32_t a = -(*lo);
|
||||
const int32_t b = -(*hi);
|
||||
|
||||
*lo = MIN2(a, b);
|
||||
*hi = MAX2(a, b);
|
||||
}
|
||||
|
||||
/* Negation of a negation cancels out, but negation of absolute value
|
||||
* must preserve the integer_abs bit.
|
||||
*/
|
||||
return root ^ integer_neg;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Any value with the sign-bit set is problematic. Consider the case when
|
||||
* bound is 0x80000000. As an unsigned value, this means the value must be
|
||||
* in the range [0, 0x80000000]. As a signed value, it means the value must
|
||||
* be in the range [0, INT_MAX] or it must be INT_MIN.
|
||||
*
|
||||
* If bound is -2, it means the value is either in the range [INT_MIN, -2]
|
||||
* or it is in the range [0, INT_MAX].
|
||||
*
|
||||
* This function only returns a single, contiguous range. The union of the
|
||||
* two ranges for any value of bound with the sign-bit set is [INT_MIN,
|
||||
* INT_MAX].
|
||||
*/
|
||||
const int32_t bound = nir_unsigned_upper_bound(shader, range_ht,
|
||||
scalar, NULL);
|
||||
if (bound < 0) {
|
||||
*lo = INT32_MIN;
|
||||
*hi = INT32_MAX;
|
||||
} else {
|
||||
*lo = 0;
|
||||
*hi = bound;
|
||||
}
|
||||
|
||||
return non_unary;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_nir_opt_peephole_imul32x16_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
UNUSED void *cb_data)
|
||||
void *cb_data)
|
||||
{
|
||||
struct pass_data *d = (struct pass_data *) cb_data;
|
||||
struct hash_table *range_ht = d->range_ht;
|
||||
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
|
|
@ -105,16 +204,85 @@ brw_nir_opt_peephole_imul32x16_instr(nir_builder *b,
|
|||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
if (imul->dest.dest.ssa.num_components > 1)
|
||||
return false;
|
||||
|
||||
const nir_ssa_scalar imul_scalar = { &imul->dest.dest.ssa, 0 };
|
||||
int idx = -1;
|
||||
enum root_operation prev_root = invalid_root;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
/* All constants were previously processed. There is nothing more to
|
||||
* learn from a constant here.
|
||||
*/
|
||||
if (imul->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const)
|
||||
continue;
|
||||
|
||||
nir_ssa_scalar scalar = nir_ssa_scalar_chase_alu_src(imul_scalar, i);
|
||||
int lo = INT32_MIN;
|
||||
int hi = INT32_MAX;
|
||||
|
||||
const enum root_operation root =
|
||||
signed_integer_range_analysis(b->shader, range_ht, scalar, &lo, &hi);
|
||||
|
||||
/* Copy propagation (in the backend) has trouble handling cases like
|
||||
*
|
||||
* mov(8) g60<1>D -g59<8,8,1>D
|
||||
* mul(8) g61<1>D g63<8,8,1>D g60<16,8,2>W
|
||||
*
|
||||
* If g59 had absolute value instead of negation, even improved copy
|
||||
* propagation would not be able to make progress.
|
||||
*
|
||||
* In cases where both sources to the integer multiplication can fit in
|
||||
* 16-bits, choose the source that does not have a source modifier.
|
||||
*/
|
||||
if (root < prev_root) {
|
||||
if (lo >= INT16_MIN && hi <= INT16_MAX) {
|
||||
new_opcode = nir_op_imul_32x16;
|
||||
idx = i;
|
||||
prev_root = root;
|
||||
|
||||
if (root == non_unary)
|
||||
break;
|
||||
} else if (lo >= 0 && hi <= UINT16_MAX) {
|
||||
new_opcode = nir_op_umul_32x16;
|
||||
idx = i;
|
||||
prev_root = root;
|
||||
|
||||
if (root == non_unary)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (new_opcode == nir_num_opcodes) {
|
||||
assert(idx == -1);
|
||||
assert(prev_root == invalid_root);
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(idx != -1);
|
||||
assert(prev_root != invalid_root);
|
||||
|
||||
replace_imul_instr(b, imul, idx, new_opcode);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_opt_peephole_imul32x16(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
brw_nir_opt_peephole_imul32x16_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
struct pass_data cb_data;
|
||||
|
||||
cb_data.range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
bool progress = nir_shader_instructions_pass(shader,
|
||||
brw_nir_opt_peephole_imul32x16_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
&cb_data);
|
||||
|
||||
_mesa_hash_table_destroy(cb_data.range_ht, NULL);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue