intel/fs: Use a strided MOV instead of a conversion for load_* destinations

In many cases, the compiler can just copy-prop the strided MOV whereas
the conversion is a bit trickier.  This cuts 5% of the instructions off
of one particular Vulkan CTS test which does lots of load_ssbo.

Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Jason Ekstrand 2019-07-13 18:35:20 -05:00
parent 812b341578
commit 7ceec21b76

View file

@ -4018,7 +4018,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
fs_reg read_result = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
read_result, srcs, SURFACE_LOGICAL_NUM_SRCS);
bld.MOV(dest, read_result);
bld.MOV(dest, subscript(read_result, dest.type, 0));
}
break;
}
@ -4644,15 +4644,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
const unsigned bit_size = nir_dest_bit_size(instr->dest);
assert(bit_size <= 32);
assert(nir_dest_num_components(instr->dest) == 1);
brw_reg_type data_type =
brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
tmp,
get_nir_src(instr->src[0]), /* Address */
fs_reg(), /* No source data */
brw_imm_ud(bit_size));
bld.MOV(retype(dest, data_type), tmp);
bld.MOV(dest, subscript(tmp, dest.type, 0));
}
break;
}
@ -4755,7 +4753,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg read_result = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
read_result, srcs, SURFACE_LOGICAL_NUM_SRCS);
bld.MOV(dest, read_result);
bld.MOV(dest, subscript(read_result, dest.type, 0));
}
break;
}